summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-06-28 17:26:15 +0100
committerEric Anholt <[email protected]>2014-08-08 18:59:46 -0700
commit2e35981d4d625d951328ef5b8f95798112997fb3 (patch)
tree8e89e57b117c8979ff302660406ebc048438724b
parent7108c24fd02a76f3efef4ba5d9aefdf0704ab0d8 (diff)
vc4: Add support for SNE/SEQ/SGE/SLT.
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h6
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_defines.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_disasm.c34
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c55
6 files changed, 96 insertions, 11 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 4755ea0db90..4742c54772a 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -254,6 +254,10 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
[TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
[TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SEQ] = { QOP_SEQ, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SNE] = { QOP_SNE, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SGE] = { QOP_SGE, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SLT] = { QOP_SLT, tgsi_to_qir_alu },
[TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
[TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
[TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index a4bb6cd1fd1..4ee1f018fc5 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -43,6 +43,12 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_FMAX] = { "fmax", 1, 2 },
[QOP_FMINABS] = { "fminabs", 1, 2 },
[QOP_FMAXABS] = { "fmaxabs", 1, 2 },
+
+ [QOP_SEQ] = { "seq", 1, 2 },
+ [QOP_SNE] = { "sne", 1, 2 },
+ [QOP_SGE] = { "sge", 1, 2 },
+ [QOP_SLT] = { "slt", 1, 2 },
+
[QOP_FTOI] = { "ftoi", 1, 1 },
[QOP_RCP] = { "rcp", 1, 1 },
[QOP_RSQ] = { "rsq", 1, 1 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index ae9e1796b90..4263adcddd1 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -50,6 +50,12 @@ enum qop {
QOP_FMAX,
QOP_FMINABS,
QOP_FMAXABS,
+
+ QOP_SEQ,
+ QOP_SNE,
+ QOP_SGE,
+ QOP_SLT,
+
QOP_FTOI,
QOP_RCP,
QOP_RSQ,
diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h
index 13c940c0f8e..bdd5d94708f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_defines.h
+++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h
@@ -223,6 +223,8 @@ enum qpu_pack_a {
#define QPU_COND_MUL_SHIFT 46
#define QPU_COND_MUL_MASK QPU_MASK(48, 46)
+#define QPU_SF ((uint64_t)1 << 45)
+
#define QPU_WADDR_ADD_SHIFT 38
#define QPU_WADDR_ADD_MASK QPU_MASK(43, 38)
#define QPU_WADDR_MUL_SHIFT 32
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 0aea2970f68..4ec6d9657b7 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -199,6 +199,17 @@ static const char *qpu_pack_a[] = {
[QPU_PACK_A_8D_SAT] = ".8d.sat",
};
+static const char *qpu_condflags[] = {
+ [QPU_COND_NEVER] = ".never",
+ [QPU_COND_ALWAYS] = "",
+ [QPU_COND_ZS] = ".zs",
+ [QPU_COND_ZC] = ".zc",
+ [QPU_COND_NS] = ".ns",
+ [QPU_COND_NC] = ".nc",
+ [QPU_COND_CS] = ".cs",
+ [QPU_COND_CC] = ".cc",
+};
+
#define DESC(array, index) \
((index > ARRAY_SIZE(array) || !(array)[index]) ? \
"???" : (array)[index])
@@ -282,11 +293,15 @@ static void
print_add_op(uint64_t inst)
{
uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
+ uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD);
bool is_mov = (op_add == QPU_A_OR &&
QPU_GET_FIELD(inst, QPU_ADD_A) ==
QPU_GET_FIELD(inst, QPU_ADD_B));
- fprintf(stderr, "%s ", is_mov ? "mov" : DESC(qpu_add_opcodes, op_add));
+ fprintf(stderr, "%s%s%s ",
+ is_mov ? "mov" : DESC(qpu_add_opcodes, op_add),
+ ((inst & QPU_SF) && op_add != QPU_A_NOP) ? ".sf" : "",
+ op_add != QPU_A_NOP ? DESC(qpu_condflags, cond) : "");
print_alu_dst(inst, false);
fprintf(stderr, ", ");
@@ -303,12 +318,17 @@ print_add_op(uint64_t inst)
static void
print_mul_op(uint64_t inst)
{
+ uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL);
+ uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL);
bool is_mov = (op_mul == QPU_M_V8MIN &&
QPU_GET_FIELD(inst, QPU_MUL_A) ==
QPU_GET_FIELD(inst, QPU_MUL_B));
- fprintf(stderr, "%s ", is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul));
+ fprintf(stderr, "%s%s%s ",
+ is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul),
+ ((inst & QPU_SF) && op_add == QPU_A_NOP) ? ".sf" : "",
+ op_mul != QPU_M_NOP ? DESC(qpu_condflags, cond) : "");
print_alu_dst(inst, true);
fprintf(stderr, ", ");
@@ -325,12 +345,18 @@ static void
print_load_imm(uint64_t inst)
{
uint32_t imm = inst;
+ uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+ uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+ uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
fprintf(stderr, "load_imm ");
print_alu_dst(inst, false);
- fprintf(stderr, ", ");
+ fprintf(stderr, "%s, ", (waddr_add != QPU_W_NOP ?
+ DESC(qpu_condflags, cond_add) : ""));
print_alu_dst(inst, true);
- fprintf(stderr, ", ");
+ fprintf(stderr, "%s, ", (waddr_mul != QPU_W_NOP ?
+ DESC(qpu_condflags, cond_mul) : ""));
fprintf(stderr, "0x%08x (%f)", imm, uif(imm));
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 330876734d1..d0f7f894182 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -40,6 +40,28 @@ vc4_dump_program(struct qcompile *c)
}
}
+/**
+ * This is used to resolve the fact that we might register-allocate two
+ * different operands of an instruction to the same physical register file
+ * even though instructions have only one field for the register file source
+ * address.
+ *
+ * In that case, we need to move one to a temporary that can be used in the
+ * instruction, instead.
+ */
+static void
+fixup_raddr_conflict(uint64_t *insts, uint32_t *ni,
+ struct qpu_reg src0, struct qpu_reg *src1)
+{
+ if ((src0.mux == QPU_MUX_A || src0.mux == QPU_MUX_B) &&
+ (src1->mux == QPU_MUX_A || src1->mux == QPU_MUX_B) &&
+ src0.addr != src1->addr) {
+ insts[(*ni)++] = qpu_inst(qpu_a_MOV(qpu_r5(), *src1),
+ qpu_m_NOP());
+ *src1 = qpu_r5();
+ }
+}
+
void
vc4_generate_code(struct qcompile *c)
{
@@ -110,6 +132,13 @@ vc4_generate_code(struct qcompile *c)
M(FMUL),
};
+ static const uint32_t compareflags[] = {
+ [QOP_SEQ - QOP_SEQ] = QPU_COND_ZS,
+ [QOP_SNE - QOP_SEQ] = QPU_COND_ZC,
+ [QOP_SLT - QOP_SEQ] = QPU_COND_NS,
+ [QOP_SGE - QOP_SEQ] = QPU_COND_NC,
+ };
+
struct qpu_reg src[4];
for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
int index = qinst->src[i].index;
@@ -184,6 +213,24 @@ vc4_generate_code(struct qcompile *c)
}
break;
+ case QOP_SEQ:
+ case QOP_SNE:
+ case QOP_SGE:
+ case QOP_SLT:
+ fixup_raddr_conflict(insts, &ni, src[0], &src[1]);
+ insts[ni++] = qpu_inst(qpu_a_SUB(qpu_ra(QPU_W_NOP),
+ src[0], src[1]),
+ qpu_m_NOP());
+ insts[ni - 1] |= QPU_SF;
+
+ insts[ni++] = qpu_load_imm_f(dst, 0.0);
+ insts[ni++] = qpu_load_imm_f(dst, 1.0);
+ insts[ni - 1] = ((insts[ni - 1] & ~QPU_COND_ADD_MASK)
+ | QPU_SET_FIELD(compareflags[qinst->op - QOP_SEQ],
+ QPU_COND_ADD));
+
+ break;
+
case QOP_VPM_WRITE:
insts[ni++] = qpu_inst(qpu_a_MOV(qpu_ra(QPU_W_VPM),
src[0]),
@@ -274,13 +321,7 @@ vc4_generate_code(struct qcompile *c)
if (qir_get_op_nsrc(qinst->op) == 1)
src[1] = src[0];
- if ((src[0].mux == QPU_MUX_A || src[0].mux == QPU_MUX_B) &&
- (src[1].mux == QPU_MUX_A || src[1].mux == QPU_MUX_B) &&
- src[0].addr != src[1].addr) {
- insts[ni++] = qpu_inst(qpu_a_MOV(qpu_r5(), src[1]),
- qpu_m_NOP());
- src[1] = qpu_r5();
- }
+ fixup_raddr_conflict(insts, &ni, src[0], &src[1]);
if (translate[qinst->op].is_mul) {
insts[ni++] = qpu_inst(qpu_a_NOP(),