diff options
-rw-r--r-- | src/gallium/drivers/vc4/vc4_opt_algebraic.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 104 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 37 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 63 |
5 files changed, 148 insertions, 79 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index 2bf474ccef9..f8ed6218adc 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -45,9 +45,12 @@ qir_opt_algebraic(struct qcompile *c) struct qinst *inst = (struct qinst *)node; switch (inst->op) { - case QOP_CMP: - /* Turn "dst = (a < 0) ? b : b)" into "dst = b" */ - if (qir_reg_equals(inst->src[1], inst->src[2])) { + case QOP_SEL_X_Y_ZS: + case QOP_SEL_X_Y_ZC: + case QOP_SEL_X_Y_NS: + case QOP_SEL_X_Y_NC: + /* Turn "dst = (sf == x) ? a : a)" into "dst = a" */ + if (qir_reg_equals(inst->src[0], inst->src[1])) { if (debug) { fprintf(stderr, "optimizing: "); qir_dump_inst(inst); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index d404047e4bb..aaa7eb346f3 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -249,6 +249,58 @@ tgsi_to_qir_alu(struct tgsi_to_qir *trans, } static struct qreg +tgsi_to_qir_seq(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_ZS(c, qir_uniform_f(trans, 1.0)); +} + +static struct qreg +tgsi_to_qir_sne(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_ZC(c, qir_uniform_f(trans, 1.0)); +} + +static struct qreg +tgsi_to_qir_slt(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_NS(c, qir_uniform_f(trans, 1.0)); +} + +static struct qreg +tgsi_to_qir_sge(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_NC(c, qir_uniform_f(trans, 1.0)); +} + +static struct qreg +tgsi_to_qir_cmp(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, src[0 * 4 + i]); + return qir_SEL_X_Y_NS(c, + src[1 * 4 + i], + src[2 * 4 + i]); +} + +static struct qreg tgsi_to_qir_mad(struct tgsi_to_qir *trans, struct tgsi_full_instruction *tgsi_inst, enum qop op, struct qreg *src, int i) @@ -280,16 +332,15 @@ tgsi_to_qir_lit(struct tgsi_to_qir *trans, case 2: { struct qreg zero = qir_uniform_f(trans, 0.0); + qir_SF(c, x); /* XXX: Clamp w to -128..128 */ - return qir_CMP(c, - x, - zero, - qir_EXP2(c, qir_FMUL(c, - w, - qir_LOG2(c, - qir_FMAX(c, - y, - zero))))); + return qir_SEL_X_0_NC(c, + qir_EXP2(c, qir_FMUL(c, + w, + qir_LOG2(c, + qir_FMAX(c, + y, + zero))))); } default: assert(!"not reached"); @@ -415,10 +466,10 @@ tgsi_to_qir_frc(struct tgsi_to_qir *trans, struct qcompile *c = trans->c; struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i])); struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc); - return qir_CMP(c, - diff, - qir_FADD(c, diff, qir_uniform_f(trans, 1.0)), - diff); + qir_SF(c, diff); + return qir_SEL_X_Y_NS(c, + qir_FADD(c, diff, qir_uniform_f(trans, 1.0)), + diff); } /** @@ -436,12 +487,11 @@ tgsi_to_qir_flr(struct tgsi_to_qir *trans, /* This will be < 0 if we truncated and the truncation was of a value * that was < 0 in the first place. */ - struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc); + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc)); - return qir_CMP(c, - diff, - qir_FSUB(c, trunc, qir_uniform_f(trans, 1.0)), - trunc); + return qir_SEL_X_Y_NS(c, + qir_FSUB(c, trunc, qir_uniform_f(trans, 1.0)), + trunc); } static struct qreg @@ -613,10 +663,10 @@ tgsi_to_qir_kill_if(struct tgsi_to_qir *trans, struct qreg *src, int i) if (trans->discard.file == QFILE_NULL) trans->discard = qir_uniform_f(trans, 0.0); - trans->discard = qir_CMP(c, - src[0 * 4 + i], - qir_uniform_f(trans, 1.0), - trans->discard); + qir_SF(c, src[0 * 4 + i]); + trans->discard = qir_SEL_X_Y_NS(c, + qir_uniform_f(trans, 1.0), + trans->discard); } static void @@ -705,11 +755,11 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans, [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu }, [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu }, [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu }, - [TGSI_OPCODE_SEQ] = { QOP_SEQ, tgsi_to_qir_alu }, - [TGSI_OPCODE_SNE] = { QOP_SNE, tgsi_to_qir_alu }, - [TGSI_OPCODE_SGE] = { QOP_SGE, tgsi_to_qir_alu }, - [TGSI_OPCODE_SLT] = { QOP_SLT, tgsi_to_qir_alu }, - [TGSI_OPCODE_CMP] = { QOP_CMP, tgsi_to_qir_alu }, + [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq }, + [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne }, + [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge }, + [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt }, + [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp }, [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad }, [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 }, [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 }, diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 0b0d2c11cf1..72149908422 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -43,11 +43,15 @@ static const struct qir_op_info qir_op_info[] = { [QOP_FMINABS] = { "fminabs", 1, 2 }, [QOP_FMAXABS] = { "fmaxabs", 1, 2 }, - [QOP_SEQ] = { "seq", 1, 2 }, - [QOP_SNE] = { "sne", 1, 2 }, - [QOP_SGE] = { "sge", 1, 2 }, - [QOP_SLT] = { "slt", 1, 2 }, - [QOP_CMP] = { "cmp", 1, 3 }, + [QOP_SF] = { "sf", 0, 1 }, + [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 }, + [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1 }, + [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1 }, + [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1 }, + [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2 }, + [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2 }, + [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 }, + [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 }, [QOP_FTOI] = { "ftoi", 1, 1 }, [QOP_ITOF] = { "itof", 1, 1 }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 77b5f1af903..99df99c1a07 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -54,11 +54,21 @@ enum qop { QOP_FMINABS, QOP_FMAXABS, - QOP_SEQ, - QOP_SNE, - QOP_SGE, - QOP_SLT, - QOP_CMP, + /* Sets the flag register according to src. */ + QOP_SF, + + /* Note: Orderings of these compares must be the same as in + * qpu_defines.h. Selects the src[0] if the ns flag bit is set, + * otherwise 0. */ + QOP_SEL_X_0_ZS, + QOP_SEL_X_0_ZC, + QOP_SEL_X_0_NS, + QOP_SEL_X_0_NC, + /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ + QOP_SEL_X_Y_ZS, + QOP_SEL_X_Y_ZC, + QOP_SEL_X_Y_NS, + QOP_SEL_X_Y_NC, QOP_FTOI, QOP_ITOF, @@ -260,6 +270,15 @@ QIR_ALU1(MOV) QIR_ALU2(FADD) QIR_ALU2(FSUB) QIR_ALU2(FMUL) +QIR_NODST_1(SF) +QIR_ALU1(SEL_X_0_ZS) +QIR_ALU1(SEL_X_0_ZC) +QIR_ALU1(SEL_X_0_NS) +QIR_ALU1(SEL_X_0_NC) +QIR_ALU2(SEL_X_Y_ZS) +QIR_ALU2(SEL_X_Y_ZC) +QIR_ALU2(SEL_X_Y_NS) +QIR_ALU2(SEL_X_Y_NC) QIR_ALU2(FMIN) QIR_ALU2(FMAX) QIR_ALU2(FMINABS) @@ -284,14 +303,6 @@ QIR_ALU0(FRAG_RCP_W) QIR_NODST_1(TLB_DISCARD_SETUP) static inline struct qreg -qir_CMP(struct qcompile *c, struct qreg cmp, struct qreg a, struct qreg b) -{ - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst4(QOP_CMP, t, cmp, a, b, c->undef)); - return t; -} - -static inline struct qreg qir_R4_UNPACK(struct qcompile *c, int i) { struct qreg t = qir_get_temp(c); diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 4e28ff7c3b8..6d2c34f2d1f 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -60,6 +60,12 @@ last_inst(struct qcompile *c) return &q->inst; } +static void +set_last_cond_add(struct qcompile *c, uint32_t cond) +{ + *last_inst(c) = qpu_set_cond_add(*last_inst(c), cond); +} + /** * This is used to resolve the fact that we might register-allocate two * different operands of an instruction to the same physical register file @@ -278,13 +284,6 @@ vc4_generate_code(struct qcompile *c) M(FMUL), }; - static const uint32_t compareflags[] = { - [QOP_SEQ - QOP_SEQ] = QPU_COND_ZS, - [QOP_SNE - QOP_SEQ] = QPU_COND_ZC, - [QOP_SLT - QOP_SEQ] = QPU_COND_NS, - [QOP_SGE - QOP_SEQ] = QPU_COND_NC, - }; - struct qpu_reg src[4]; for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) { int index = qinst->src[i].index; @@ -365,32 +364,36 @@ vc4_generate_code(struct qcompile *c) } break; - case QOP_CMP: + case QOP_SF: + fixup_raddr_conflict(c, src[0], &src[1]); queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), src[0])); *last_inst(c) |= QPU_SF; - - queue(c, qpu_a_MOV(dst, src[1])); - *last_inst(c) = qpu_set_cond_add(*last_inst(c), - QPU_COND_NS); - - queue(c, qpu_a_MOV(dst, src[2])); - *last_inst(c) = qpu_set_cond_add(*last_inst(c), - QPU_COND_NC); break; - case QOP_SEQ: - case QOP_SNE: - case QOP_SGE: - case QOP_SLT: - fixup_raddr_conflict(c, src[0], &src[1]); - queue(c, qpu_a_FSUB(qpu_ra(QPU_W_NOP), src[0], src[1])); - *last_inst(c) |= QPU_SF; + case QOP_SEL_X_0_ZS: + case QOP_SEL_X_0_ZC: + case QOP_SEL_X_0_NS: + case QOP_SEL_X_0_NC: + queue(c, qpu_a_MOV(dst, src[0])); + set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + + QPU_COND_ZS); + + queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0())); + set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^ + 1) + QPU_COND_ZS); + break; - queue(c, qpu_load_imm_f(dst, 0.0)); - queue(c, qpu_load_imm_f(dst, 1.0)); - *last_inst(c) = qpu_set_cond_add(*last_inst(c), - compareflags[qinst->op - QOP_SEQ]); + case QOP_SEL_X_Y_ZS: + case QOP_SEL_X_Y_ZC: + case QOP_SEL_X_Y_NS: + case QOP_SEL_X_Y_NC: + queue(c, qpu_a_MOV(dst, src[0])); + set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS + + QPU_COND_ZS); + queue(c, qpu_a_MOV(dst, src[1])); + set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^ + 1) + QPU_COND_ZS); break; @@ -475,8 +478,7 @@ vc4_generate_code(struct qcompile *c) queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), qpu_rb(QPU_R_FRAG_PAYLOAD_ZW))); if (discard) { - *last_inst(c) = qpu_set_cond_add(*last_inst(c), - QPU_COND_ZS); + set_last_cond_add(c, QPU_COND_ZS); } break; @@ -490,8 +492,7 @@ vc4_generate_code(struct qcompile *c) case QOP_TLB_COLOR_WRITE: queue(c, qpu_a_MOV(qpu_tlbc(), src[0])); if (discard) { - *last_inst(c) = qpu_set_cond_add(*last_inst(c), - QPU_COND_ZS); + set_last_cond_add(c, QPU_COND_ZS); } break; |