diff options
-rw-r--r-- | src/gallium/drivers/vc4/vc4_opt_algebraic.c | 37 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 140 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 46 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 50 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir_schedule.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 49 |
6 files changed, 128 insertions, 201 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index aea2b9dbe87..b8ce377ff6b 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -145,43 +145,6 @@ qir_opt_algebraic(struct vc4_compile *c) list_for_each_entry(struct qinst, inst, &c->instructions, link) { switch (inst->op) { - case QOP_SEL_X_Y_ZS: - case QOP_SEL_X_Y_ZC: - case QOP_SEL_X_Y_NS: - case QOP_SEL_X_Y_NC: - case QOP_SEL_X_Y_CS: - case QOP_SEL_X_Y_CC: - if (is_zero(c, inst->src[1])) { - /* Replace references to a 0 uniform value - * with the SEL_X_0 equivalent. - */ - dump_from(c, inst); - inst->op -= (QOP_SEL_X_Y_ZS - QOP_SEL_X_0_ZS); - inst->src[1] = c->undef; - progress = true; - dump_to(c, inst); - break; - } - - if (is_zero(c, inst->src[0])) { - /* Replace references to a 0 uniform value - * with the SEL_X_0 equivalent, flipping the - * condition being evaluated since the operand - * order is flipped. - */ - dump_from(c, inst); - inst->op -= QOP_SEL_X_Y_ZS; - inst->op ^= 1; - inst->op += QOP_SEL_X_0_ZS; - inst->src[0] = inst->src[1]; - inst->src[1] = c->undef; - progress = true; - dump_to(c, inst); - break; - } - - break; - case QOP_FMIN: if (is_1f(c, inst->src[1]) && inst->src[0].pack >= QPU_UNPACK_8D_REP && diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 44e89fe64e9..c24aa19e74e 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -275,7 +275,7 @@ qir_srgb_decode(struct vc4_compile *c, struct qreg srgb) qir_uniform_f(c, 2.4)); qir_SF(c, qir_FSUB(c, srgb, qir_uniform_f(c, 0.04045))); - return qir_SEL_X_Y_NS(c, low, high); + return qir_SEL(c, QPU_COND_NS, low, high); } static struct qreg @@ -475,7 +475,8 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) struct qreg normalized = ntq_scale_depth_texture(c, tex); struct qreg depth_output; - struct qreg one = qir_uniform_f(c, 1.0f); + struct qreg u0 = qir_uniform_f(c, 0.0f); + struct qreg u1 = qir_uniform_f(c, 1.0f); if (c->key->tex[unit].compare_mode) { if (has_proj) compare = qir_FMUL(c, compare, proj); @@ -485,31 +486,31 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) depth_output = qir_uniform_f(c, 0.0f); break; case PIPE_FUNC_ALWAYS: - depth_output = one; + depth_output = u1; break; case PIPE_FUNC_EQUAL: qir_SF(c, qir_FSUB(c, compare, normalized)); - depth_output = qir_SEL_X_0_ZS(c, one); + depth_output = qir_SEL(c, QPU_COND_ZS, u1, u0); break; case PIPE_FUNC_NOTEQUAL: qir_SF(c, qir_FSUB(c, compare, normalized)); - depth_output = qir_SEL_X_0_ZC(c, one); + depth_output = qir_SEL(c, QPU_COND_ZC, u1, u0); break; case PIPE_FUNC_GREATER: qir_SF(c, qir_FSUB(c, compare, normalized)); - depth_output = qir_SEL_X_0_NC(c, one); + depth_output = qir_SEL(c, QPU_COND_NC, u1, u0); break; case PIPE_FUNC_GEQUAL: qir_SF(c, qir_FSUB(c, normalized, compare)); - depth_output = qir_SEL_X_0_NS(c, one); + depth_output = qir_SEL(c, QPU_COND_NS, u1, u0); break; case PIPE_FUNC_LESS: qir_SF(c, qir_FSUB(c, compare, normalized)); - depth_output = qir_SEL_X_0_NS(c, one); + depth_output = qir_SEL(c, QPU_COND_NS, u1, u0); break; case PIPE_FUNC_LEQUAL: qir_SF(c, qir_FSUB(c, normalized, compare)); - depth_output = qir_SEL_X_0_NC(c, one); + depth_output = qir_SEL(c, QPU_COND_NC, u1, u0); break; } } else { @@ -553,9 +554,8 @@ ntq_ffract(struct vc4_compile *c, struct qreg src) struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src)); struct qreg diff = qir_FSUB(c, src, trunc); qir_SF(c, diff); - return qir_SEL_X_Y_NS(c, - qir_FADD(c, diff, qir_uniform_f(c, 1.0)), - diff); + return qir_SEL(c, QPU_COND_NS, + qir_FADD(c, diff, qir_uniform_f(c, 1.0)), diff); } /** @@ -572,9 +572,8 @@ ntq_ffloor(struct vc4_compile *c, struct qreg src) */ qir_SF(c, qir_FSUB(c, src, trunc)); - return qir_SEL_X_Y_NS(c, - qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)), - trunc); + return qir_SEL(c, QPU_COND_NS, + qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)), trunc); } /** @@ -591,9 +590,8 @@ ntq_fceil(struct vc4_compile *c, struct qreg src) */ qir_SF(c, qir_FSUB(c, trunc, src)); - return qir_SEL_X_Y_NS(c, - qir_FADD(c, trunc, qir_uniform_f(c, 1.0)), - trunc); + return qir_SEL(c, QPU_COND_NS, + qir_FADD(c, trunc, qir_uniform_f(c, 1.0)), trunc); } static struct qreg @@ -668,10 +666,13 @@ ntq_fcos(struct vc4_compile *c, struct qreg src) static struct qreg ntq_fsign(struct vc4_compile *c, struct qreg src) { + struct qreg t = qir_get_temp(c); + qir_SF(c, src); - return qir_SEL_X_Y_NC(c, - qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0)), - qir_uniform_f(c, -1.0)); + qir_MOV_dest(c, t, qir_uniform_f(c, 0.0)); + qir_MOV_dest(c, t, qir_uniform_f(c, 1.0))->cond = QPU_COND_ZC; + qir_MOV_dest(c, t, qir_uniform_f(c, -1.0))->cond = QPU_COND_NS; + return t; } static void @@ -888,6 +889,56 @@ ntq_emit_ubfe(struct vc4_compile *c, struct qreg base, struct qreg offset, return qir_UNPACK_8_I(c, base, offset_bit / 8); } +static struct qreg +ntq_emit_comparison(struct vc4_compile *c, nir_alu_instr *instr, + struct qreg src0, struct qreg src1) +{ + enum qpu_cond cond; + + switch (instr->op) { + case nir_op_feq: + case nir_op_ieq: + case nir_op_seq: + cond = QPU_COND_ZS; + break; + case nir_op_fne: + case nir_op_ine: + case nir_op_sne: + cond = QPU_COND_ZC; + break; + case nir_op_fge: + case nir_op_ige: + case nir_op_uge: + case nir_op_sge: + cond = QPU_COND_NC; + break; + case nir_op_flt: + case nir_op_ilt: + case nir_op_slt: + cond = QPU_COND_NS; + break; + default: + unreachable("bad ALU op for comparison"); + } + + if (nir_op_infos[instr->op].input_types[0] == nir_type_float) + qir_SF(c, qir_FSUB(c, src0, src1)); + else + qir_SF(c, qir_SUB(c, src0, src1)); + + switch (instr->op) { + case nir_op_seq: + case nir_op_sne: + case nir_op_sge: + case nir_op_slt: + return qir_SEL(c, cond, + qir_uniform_f(c, 1.0), qir_uniform_f(c, 0.0)); + default: + return qir_SEL(c, cond, + qir_uniform_ui(c, ~0), qir_uniform_ui(c, 0.0)); + } +} + static void ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) { @@ -974,7 +1025,9 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) case nir_op_i2b: case nir_op_f2b: qir_SF(c, src[0]); - *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0)); + *dest = qir_SEL(c, QPU_COND_ZC, + qir_uniform_ui(c, ~0), + qir_uniform_ui(c, 0)); break; case nir_op_iadd: @@ -1016,65 +1069,28 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) break; case nir_op_seq: - qir_SF(c, qir_FSUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0)); - break; case nir_op_sne: - qir_SF(c, qir_FSUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0)); - break; case nir_op_sge: - qir_SF(c, qir_FSUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0)); - break; case nir_op_slt: - qir_SF(c, qir_FSUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0)); - break; case nir_op_feq: - qir_SF(c, qir_FSUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0)); - break; case nir_op_fne: - qir_SF(c, qir_FSUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0)); - break; case nir_op_fge: - qir_SF(c, qir_FSUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0)); - break; case nir_op_flt: - qir_SF(c, qir_FSUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0)); - break; case nir_op_ieq: - qir_SF(c, qir_SUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0)); - break; case nir_op_ine: - qir_SF(c, qir_SUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0)); - break; case nir_op_ige: - qir_SF(c, qir_SUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0)); - break; case nir_op_uge: - qir_SF(c, qir_SUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0)); - break; case nir_op_ilt: - qir_SF(c, qir_SUB(c, src[0], src[1])); - *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0)); + *dest = ntq_emit_comparison(c, instr, src[0], src[1]); break; case nir_op_bcsel: qir_SF(c, src[0]); - *dest = qir_SEL_X_Y_NS(c, src[1], src[2]); + *dest = qir_SEL(c, QPU_COND_NS, src[1], src[2]); break; case nir_op_fcsel: qir_SF(c, src[0]); - *dest = qir_SEL_X_Y_ZC(c, src[1], src[2]); + *dest = qir_SEL(c, QPU_COND_ZC, src[1], src[2]); break; case nir_op_frcp: diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index a46fb4fd3b8..efbb69b71a7 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -65,19 +65,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_XOR] = { "xor", 1, 2 }, [QOP_NOT] = { "not", 1, 1 }, - [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true }, - [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true }, - [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true }, - [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true }, - [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true }, - [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true }, - [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true }, - [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true }, - [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true }, - [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true }, - [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true }, - [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true }, - [QOP_RCP] = { "rcp", 1, 1, false, true }, [QOP_RSQ] = { "rsq", 1, 1, false, true }, [QOP_EXP2] = { "exp2", 1, 2, false, true }, @@ -219,23 +206,8 @@ qir_is_tex(struct qinst *inst) bool qir_depends_on_flags(struct qinst *inst) { - switch (inst->op) { - case QOP_SEL_X_0_NS: - case QOP_SEL_X_0_NC: - case QOP_SEL_X_0_ZS: - case QOP_SEL_X_0_ZC: - case QOP_SEL_X_0_CS: - case QOP_SEL_X_0_CC: - case QOP_SEL_X_Y_NS: - case QOP_SEL_X_Y_NC: - case QOP_SEL_X_Y_ZS: - case QOP_SEL_X_Y_ZC: - case QOP_SEL_X_Y_CS: - case QOP_SEL_X_Y_CC: - return true; - default: - return false; - } + return (inst->cond != QPU_COND_ALWAYS && + inst->cond != QPU_COND_NEVER); } bool @@ -292,8 +264,19 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write) void qir_dump_inst(struct vc4_compile *c, struct qinst *inst) { - fprintf(stderr, "%s%s ", + static const char *conditions[] = { + [QPU_COND_ALWAYS] = "", + [QPU_COND_NEVER] = ".never", + [QPU_COND_ZS] = ".zs", + [QPU_COND_ZC] = ".zc", + [QPU_COND_NS] = ".ns", + [QPU_COND_NC] = ".nc", + [QPU_COND_CS] = ".cs", + [QPU_COND_CC] = ".cc", + }; + fprintf(stderr, "%s%s%s ", qir_get_op_name(inst->op), + conditions[inst->cond], inst->sf ? ".sf" : ""); qir_print_reg(c, inst->dst, true); @@ -352,6 +335,7 @@ qir_inst(enum qop op, struct qreg dst, struct qreg src0, struct qreg src1) inst->src = calloc(2, sizeof(inst->src[0])); inst->src[0] = src0; inst->src[1] = src1; + inst->cond = QPU_COND_ALWAYS; return inst; } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index b0fbb4c1db2..9dad80dddff 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -93,23 +93,6 @@ enum qop { QOP_XOR, QOP_NOT, - /* Note: Orderings of these compares must be the same as in - * qpu_defines.h. Selects the src[0] if the ns flag bit is set, - * otherwise 0. */ - QOP_SEL_X_0_ZS, - QOP_SEL_X_0_ZC, - QOP_SEL_X_0_NS, - QOP_SEL_X_0_NC, - QOP_SEL_X_0_CS, - QOP_SEL_X_0_CC, - /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ - QOP_SEL_X_Y_ZS, - QOP_SEL_X_Y_ZC, - QOP_SEL_X_Y_NS, - QOP_SEL_X_Y_NC, - QOP_SEL_X_Y_CS, - QOP_SEL_X_Y_CC, - QOP_FTOI, QOP_ITOF, QOP_RCP, @@ -170,6 +153,7 @@ struct qinst { struct qreg dst; struct qreg *src; bool sf; + uint8_t cond; }; enum qstage { @@ -463,9 +447,11 @@ void qir_schedule_instructions(struct vc4_compile *c); void qir_reorder_uniforms(struct vc4_compile *c); void qir_emit(struct vc4_compile *c, struct qinst *inst); -static inline void qir_emit_nodef(struct vc4_compile *c, struct qinst *inst) +static inline struct qinst * +qir_emit_nodef(struct vc4_compile *c, struct qinst *inst) { list_addtail(&inst->link, &c->instructions); + return inst; } struct qreg qir_get_temp(struct vc4_compile *c); @@ -536,11 +522,12 @@ qir_##name(struct vc4_compile *c, struct qreg a) \ qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ return t; \ } \ -static inline void \ +static inline struct qinst * \ qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ struct qreg a) \ { \ - qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef)); \ + return qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, \ + c->undef)); \ } #define QIR_ALU2(name) \ @@ -592,18 +579,6 @@ QIR_ALU2(V8MAX) QIR_ALU2(V8ADDS) QIR_ALU2(V8SUBS) QIR_ALU2(MUL24) -QIR_ALU1(SEL_X_0_ZS) -QIR_ALU1(SEL_X_0_ZC) -QIR_ALU1(SEL_X_0_NS) -QIR_ALU1(SEL_X_0_NC) -QIR_ALU1(SEL_X_0_CS) -QIR_ALU1(SEL_X_0_CC) -QIR_ALU2(SEL_X_Y_ZS) -QIR_ALU2(SEL_X_Y_ZC) -QIR_ALU2(SEL_X_Y_NS) -QIR_ALU2(SEL_X_Y_NC) -QIR_ALU2(SEL_X_Y_CS) -QIR_ALU2(SEL_X_Y_CC) QIR_ALU2(FMIN) QIR_ALU2(FMAX) QIR_ALU2(FMINABS) @@ -648,6 +623,17 @@ QIR_NODST_1(TLB_STENCIL_SETUP) QIR_NODST_1(MS_MASK) static inline struct qreg +qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1) +{ + struct qreg t = qir_get_temp(c); + struct qinst *a = qir_MOV_dest(c, t, src0); + struct qinst *b = qir_MOV_dest(c, t, src1); + a->cond = cond; + b->cond = cond ^ 1; + return t; +} + +static inline struct qreg qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) { struct qreg t = qir_FMOV(c, src); diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index d20815f055e..2f280c54523 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -250,12 +250,11 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n) else if (inst->dst.file == QFILE_TEMP) add_write_dep(dir, &state->last_temp_write[inst->dst.index], n); + if (qir_depends_on_flags(inst)) + add_dep(dir, state->last_sf, n); + if (inst->sf) add_write_dep(dir, &state->last_sf, n); - - if (qir_depends_on_flags(inst)) { - add_dep(dir, state->last_sf, n); - } } static void diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index cb4e0cfcc3f..b06702afea2 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -64,6 +64,12 @@ set_last_cond_add(struct vc4_compile *c, uint32_t cond) *last_inst(c) = qpu_set_cond_add(*last_inst(c), cond); } +static void +set_last_cond_mul(struct vc4_compile *c, uint32_t cond) +{ + *last_inst(c) = qpu_set_cond_mul(*last_inst(c), cond); +} + /** * Some special registers can be read from either file, which lets us resolve * raddr conflicts without extra MOVs. @@ -306,42 +312,9 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; } - switch (qinst->op) { - case QOP_SEL_X_0_ZS: - case QOP_SEL_X_0_ZC: - case QOP_SEL_X_0_NS: - case QOP_SEL_X_0_NC: - case QOP_SEL_X_0_CS: - case QOP_SEL_X_0_CC: - queue(c, qpu_a_MOV(dst, src[0]) | unpack); - set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + - QPU_COND_ZS); - - queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0())); - set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^ - 1) + QPU_COND_ZS); - break; - - case QOP_SEL_X_Y_ZS: - case QOP_SEL_X_Y_ZC: - case QOP_SEL_X_Y_NS: - case QOP_SEL_X_Y_NC: - case QOP_SEL_X_Y_CS: - case QOP_SEL_X_Y_CC: - queue(c, qpu_a_MOV(dst, src[0])); - if (qinst->src[0].pack) - *(last_inst(c)) |= unpack; - set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS + - QPU_COND_ZS); - - queue(c, qpu_a_MOV(dst, src[1])); - if (qinst->src[1].pack) - *(last_inst(c)) |= unpack; - set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^ - 1) + QPU_COND_ZS); - - break; + bool handled_qinst_cond = true; + switch (qinst->op) { case QOP_RCP: case QOP_RSQ: case QOP_EXP2: @@ -497,16 +470,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) queue(c, qpu_m_alu2(translate[qinst->op].op, dst, src[0], src[1]) | unpack); + set_last_cond_mul(c, qinst->cond); } else { queue(c, qpu_a_alu2(translate[qinst->op].op, dst, src[0], src[1]) | unpack); + set_last_cond_add(c, qinst->cond); } + handled_qinst_cond = true; set_last_dst_pack(c, qinst); break; } + assert(qinst->cond == QPU_COND_ALWAYS || + handled_qinst_cond); + if (qinst->sf) { assert(!qir_is_multi_instruction(qinst)); *last_inst(c) |= QPU_SF; |