summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c9
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c104
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c14
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h37
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c63
5 files changed, 148 insertions, 79 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index 2bf474ccef9..f8ed6218adc 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -45,9 +45,12 @@ qir_opt_algebraic(struct qcompile *c)
struct qinst *inst = (struct qinst *)node;
switch (inst->op) {
- case QOP_CMP:
- /* Turn "dst = (a < 0) ? b : b)" into "dst = b" */
- if (qir_reg_equals(inst->src[1], inst->src[2])) {
+ case QOP_SEL_X_Y_ZS:
+ case QOP_SEL_X_Y_ZC:
+ case QOP_SEL_X_Y_NS:
+ case QOP_SEL_X_Y_NC:
+ /* Turn "dst = (sf == x) ? a : a)" into "dst = a" */
+ if (qir_reg_equals(inst->src[0], inst->src[1])) {
if (debug) {
fprintf(stderr, "optimizing: ");
qir_dump_inst(inst);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index d404047e4bb..aaa7eb346f3 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -249,6 +249,58 @@ tgsi_to_qir_alu(struct tgsi_to_qir *trans,
}
static struct qreg
+tgsi_to_qir_seq(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ struct qcompile *c = trans->c;
+ qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+ return qir_SEL_X_0_ZS(c, qir_uniform_f(trans, 1.0));
+}
+
+static struct qreg
+tgsi_to_qir_sne(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ struct qcompile *c = trans->c;
+ qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+ return qir_SEL_X_0_ZC(c, qir_uniform_f(trans, 1.0));
+}
+
+static struct qreg
+tgsi_to_qir_slt(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ struct qcompile *c = trans->c;
+ qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+ return qir_SEL_X_0_NS(c, qir_uniform_f(trans, 1.0));
+}
+
+static struct qreg
+tgsi_to_qir_sge(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ struct qcompile *c = trans->c;
+ qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+ return qir_SEL_X_0_NC(c, qir_uniform_f(trans, 1.0));
+}
+
+static struct qreg
+tgsi_to_qir_cmp(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ struct qcompile *c = trans->c;
+ qir_SF(c, src[0 * 4 + i]);
+ return qir_SEL_X_Y_NS(c,
+ src[1 * 4 + i],
+ src[2 * 4 + i]);
+}
+
+static struct qreg
tgsi_to_qir_mad(struct tgsi_to_qir *trans,
struct tgsi_full_instruction *tgsi_inst,
enum qop op, struct qreg *src, int i)
@@ -280,16 +332,15 @@ tgsi_to_qir_lit(struct tgsi_to_qir *trans,
case 2: {
struct qreg zero = qir_uniform_f(trans, 0.0);
+ qir_SF(c, x);
/* XXX: Clamp w to -128..128 */
- return qir_CMP(c,
- x,
- zero,
- qir_EXP2(c, qir_FMUL(c,
- w,
- qir_LOG2(c,
- qir_FMAX(c,
- y,
- zero)))));
+ return qir_SEL_X_0_NC(c,
+ qir_EXP2(c, qir_FMUL(c,
+ w,
+ qir_LOG2(c,
+ qir_FMAX(c,
+ y,
+ zero)))));
}
default:
assert(!"not reached");
@@ -415,10 +466,10 @@ tgsi_to_qir_frc(struct tgsi_to_qir *trans,
struct qcompile *c = trans->c;
struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src[0 * 4 + i]));
struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
- return qir_CMP(c,
- diff,
- qir_FADD(c, diff, qir_uniform_f(trans, 1.0)),
- diff);
+ qir_SF(c, diff);
+ return qir_SEL_X_Y_NS(c,
+ qir_FADD(c, diff, qir_uniform_f(trans, 1.0)),
+ diff);
}
/**
@@ -436,12 +487,11 @@ tgsi_to_qir_flr(struct tgsi_to_qir *trans,
/* This will be < 0 if we truncated and the truncation was of a value
* that was < 0 in the first place.
*/
- struct qreg diff = qir_FSUB(c, src[0 * 4 + i], trunc);
+ qir_SF(c, qir_FSUB(c, src[0 * 4 + i], trunc));
- return qir_CMP(c,
- diff,
- qir_FSUB(c, trunc, qir_uniform_f(trans, 1.0)),
- trunc);
+ return qir_SEL_X_Y_NS(c,
+ qir_FSUB(c, trunc, qir_uniform_f(trans, 1.0)),
+ trunc);
}
static struct qreg
@@ -613,10 +663,10 @@ tgsi_to_qir_kill_if(struct tgsi_to_qir *trans, struct qreg *src, int i)
if (trans->discard.file == QFILE_NULL)
trans->discard = qir_uniform_f(trans, 0.0);
- trans->discard = qir_CMP(c,
- src[0 * 4 + i],
- qir_uniform_f(trans, 1.0),
- trans->discard);
+ qir_SF(c, src[0 * 4 + i]);
+ trans->discard = qir_SEL_X_Y_NS(c,
+ qir_uniform_f(trans, 1.0),
+ trans->discard);
}
static void
@@ -705,11 +755,11 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
[TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
[TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
- [TGSI_OPCODE_SEQ] = { QOP_SEQ, tgsi_to_qir_alu },
- [TGSI_OPCODE_SNE] = { QOP_SNE, tgsi_to_qir_alu },
- [TGSI_OPCODE_SGE] = { QOP_SGE, tgsi_to_qir_alu },
- [TGSI_OPCODE_SLT] = { QOP_SLT, tgsi_to_qir_alu },
- [TGSI_OPCODE_CMP] = { QOP_CMP, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
+ [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
+ [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
+ [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
+ [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
[TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
[TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
[TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 0b0d2c11cf1..72149908422 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -43,11 +43,15 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_FMINABS] = { "fminabs", 1, 2 },
[QOP_FMAXABS] = { "fmaxabs", 1, 2 },
- [QOP_SEQ] = { "seq", 1, 2 },
- [QOP_SNE] = { "sne", 1, 2 },
- [QOP_SGE] = { "sge", 1, 2 },
- [QOP_SLT] = { "slt", 1, 2 },
- [QOP_CMP] = { "cmp", 1, 3 },
+ [QOP_SF] = { "sf", 0, 1 },
+ [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 },
+ [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1 },
+ [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1 },
+ [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1 },
+ [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2 },
+ [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2 },
+ [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 },
+ [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 },
[QOP_FTOI] = { "ftoi", 1, 1 },
[QOP_ITOF] = { "itof", 1, 1 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 77b5f1af903..99df99c1a07 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -54,11 +54,21 @@ enum qop {
QOP_FMINABS,
QOP_FMAXABS,
- QOP_SEQ,
- QOP_SNE,
- QOP_SGE,
- QOP_SLT,
- QOP_CMP,
+ /* Sets the flag register according to src. */
+ QOP_SF,
+
+ /* Note: Orderings of these compares must be the same as in
+ * qpu_defines.h. Selects the src[0] if the ns flag bit is set,
+ * otherwise 0. */
+ QOP_SEL_X_0_ZS,
+ QOP_SEL_X_0_ZC,
+ QOP_SEL_X_0_NS,
+ QOP_SEL_X_0_NC,
+ /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
+ QOP_SEL_X_Y_ZS,
+ QOP_SEL_X_Y_ZC,
+ QOP_SEL_X_Y_NS,
+ QOP_SEL_X_Y_NC,
QOP_FTOI,
QOP_ITOF,
@@ -260,6 +270,15 @@ QIR_ALU1(MOV)
QIR_ALU2(FADD)
QIR_ALU2(FSUB)
QIR_ALU2(FMUL)
+QIR_NODST_1(SF)
+QIR_ALU1(SEL_X_0_ZS)
+QIR_ALU1(SEL_X_0_ZC)
+QIR_ALU1(SEL_X_0_NS)
+QIR_ALU1(SEL_X_0_NC)
+QIR_ALU2(SEL_X_Y_ZS)
+QIR_ALU2(SEL_X_Y_ZC)
+QIR_ALU2(SEL_X_Y_NS)
+QIR_ALU2(SEL_X_Y_NC)
QIR_ALU2(FMIN)
QIR_ALU2(FMAX)
QIR_ALU2(FMINABS)
@@ -284,14 +303,6 @@ QIR_ALU0(FRAG_RCP_W)
QIR_NODST_1(TLB_DISCARD_SETUP)
static inline struct qreg
-qir_CMP(struct qcompile *c, struct qreg cmp, struct qreg a, struct qreg b)
-{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst4(QOP_CMP, t, cmp, a, b, c->undef));
- return t;
-}
-
-static inline struct qreg
qir_R4_UNPACK(struct qcompile *c, int i)
{
struct qreg t = qir_get_temp(c);
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 4e28ff7c3b8..6d2c34f2d1f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -60,6 +60,12 @@ last_inst(struct qcompile *c)
return &q->inst;
}
+static void
+set_last_cond_add(struct qcompile *c, uint32_t cond)
+{
+ *last_inst(c) = qpu_set_cond_add(*last_inst(c), cond);
+}
+
/**
* This is used to resolve the fact that we might register-allocate two
* different operands of an instruction to the same physical register file
@@ -278,13 +284,6 @@ vc4_generate_code(struct qcompile *c)
M(FMUL),
};
- static const uint32_t compareflags[] = {
- [QOP_SEQ - QOP_SEQ] = QPU_COND_ZS,
- [QOP_SNE - QOP_SEQ] = QPU_COND_ZC,
- [QOP_SLT - QOP_SEQ] = QPU_COND_NS,
- [QOP_SGE - QOP_SEQ] = QPU_COND_NC,
- };
-
struct qpu_reg src[4];
for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
int index = qinst->src[i].index;
@@ -365,32 +364,36 @@ vc4_generate_code(struct qcompile *c)
}
break;
- case QOP_CMP:
+ case QOP_SF:
+ fixup_raddr_conflict(c, src[0], &src[1]);
queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), src[0]));
*last_inst(c) |= QPU_SF;
-
- queue(c, qpu_a_MOV(dst, src[1]));
- *last_inst(c) = qpu_set_cond_add(*last_inst(c),
- QPU_COND_NS);
-
- queue(c, qpu_a_MOV(dst, src[2]));
- *last_inst(c) = qpu_set_cond_add(*last_inst(c),
- QPU_COND_NC);
break;
- case QOP_SEQ:
- case QOP_SNE:
- case QOP_SGE:
- case QOP_SLT:
- fixup_raddr_conflict(c, src[0], &src[1]);
- queue(c, qpu_a_FSUB(qpu_ra(QPU_W_NOP), src[0], src[1]));
- *last_inst(c) |= QPU_SF;
+ case QOP_SEL_X_0_ZS:
+ case QOP_SEL_X_0_ZC:
+ case QOP_SEL_X_0_NS:
+ case QOP_SEL_X_0_NC:
+ queue(c, qpu_a_MOV(dst, src[0]));
+ set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
+ QPU_COND_ZS);
+
+ queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0()));
+ set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^
+ 1) + QPU_COND_ZS);
+ break;
- queue(c, qpu_load_imm_f(dst, 0.0));
- queue(c, qpu_load_imm_f(dst, 1.0));
- *last_inst(c) = qpu_set_cond_add(*last_inst(c),
- compareflags[qinst->op - QOP_SEQ]);
+ case QOP_SEL_X_Y_ZS:
+ case QOP_SEL_X_Y_ZC:
+ case QOP_SEL_X_Y_NS:
+ case QOP_SEL_X_Y_NC:
+ queue(c, qpu_a_MOV(dst, src[0]));
+ set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
+ QPU_COND_ZS);
+ queue(c, qpu_a_MOV(dst, src[1]));
+ set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
+ 1) + QPU_COND_ZS);
break;
@@ -475,8 +478,7 @@ vc4_generate_code(struct qcompile *c)
queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
qpu_rb(QPU_R_FRAG_PAYLOAD_ZW)));
if (discard) {
- *last_inst(c) = qpu_set_cond_add(*last_inst(c),
- QPU_COND_ZS);
+ set_last_cond_add(c, QPU_COND_ZS);
}
break;
@@ -490,8 +492,7 @@ vc4_generate_code(struct qcompile *c)
case QOP_TLB_COLOR_WRITE:
queue(c, qpu_a_MOV(qpu_tlbc(), src[0]));
if (discard) {
- *last_inst(c) = qpu_set_cond_add(*last_inst(c),
- QPU_COND_ZS);
+ set_last_cond_add(c, QPU_COND_ZS);
}
break;