summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-08-25 12:31:49 -0700
committerEric Anholt <[email protected]>2016-08-25 17:24:11 -0700
commit8ce65261789f085e657e6a487db93d38ee6bea63 (patch)
tree960a22a83a2a764907e8c89a7f725b75b7827c97 /src/gallium/drivers/vc4
parent074f1f3c0c2cd15213a62eb7f589423ece6391c8 (diff)
vc4: Add support for MUL output rotation.
Extracted from a patch by jonasarrow on github.
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_copy_propagation.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c3
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h12
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.c13
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c14
6 files changed, 51 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index f8f1365f658..d20ee5e227d 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -90,6 +90,14 @@ try_copy_prop(struct vc4_compile *c, struct qinst *inst, struct qinst **movs)
continue;
}
+ /* Mul rotation's source needs to be in an r0-r3 accumulator,
+ * so no uniforms or regfile-a/r4 unpacking allowed.
+ */
+ if (inst->op == QOP_ROT_MUL &&
+ (mov->src[0].file != QFILE_TEMP ||
+ mov->src[0].pack))
+ continue;
+
uint8_t unpack;
if (mov->src[0].pack) {
/* Make sure that the meaning of the unpack
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 9b4a28ebab6..446af66affd 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -86,6 +86,8 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_LOAD_IMM_U2] = { "load_imm_u2", 0, 1 },
[QOP_LOAD_IMM_I2] = { "load_imm_i2", 0, 1 },
+ [QOP_ROT_MUL] = { "rot_mul", 0, 2 },
+
[QOP_BRANCH] = { "branch", 0, 0, true },
[QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
};
@@ -164,6 +166,7 @@ qir_is_mul(struct qinst *inst)
case QOP_V8MAX:
case QOP_V8ADDS:
case QOP_V8SUBS:
+ case QOP_ROT_MUL:
return true;
default:
return false;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 90cc1385043..a82c47c0341 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -168,6 +168,8 @@ enum qop {
*/
QOP_LOAD_IMM_I2,
+ QOP_ROT_MUL,
+
/* Jumps to block->successor[0] if the qinst->cond (as a
* QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note
* that block->successor[1] may be unset if the condition is ALWAYS.
@@ -822,6 +824,16 @@ qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val)
c->undef));
}
+/** Shifts the multiply output to the right by rot channels */
+static inline struct qreg
+qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot)
+{
+ return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef,
+ val,
+ qir_reg(QFILE_LOAD_IMM,
+ QPU_SMALL_IMM_MUL_ROT + rot)));
+}
+
static inline void
qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
struct qreg dest, struct qreg src)
diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c
index d022d107eb3..67850a8114a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -234,6 +234,19 @@ qpu_m_alu2(enum qpu_op_mul op,
return inst;
}
+uint64_t
+qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)
+{
+ uint64_t inst = 0;
+ inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);
+
+ inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);
+ inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,
+ QPU_SMALL_IMM);
+
+ return inst;
+}
+
static bool
merge_fields(uint64_t *merge,
uint64_t a, uint64_t b,
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index 437e4f5e5a4..5ec80f05375 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -150,6 +150,7 @@ uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
uint32_t qpu_encode_small_immediate(uint32_t i) ATTRIBUTE_CONST;
+uint64_t qpu_m_rot(struct qpu_reg dst, struct qpu_reg src, int rot) ATTRIBUTE_CONST;
bool qpu_waddr_is_tlb(uint32_t waddr) ATTRIBUTE_CONST;
bool qpu_inst_is_tlb(uint64_t inst) ATTRIBUTE_CONST;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index f5a5b8a862a..79588b3f51c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -434,6 +434,20 @@ vc4_generate_code_block(struct vc4_compile *c,
case QOP_LOAD_IMM_I2:
queue(block, qpu_load_imm_i2(dst, qinst->src[0].index));
+
+ case QOP_ROT_MUL:
+ /* Rotation at the hardware level occurs on the inputs
+ * to the MUL unit, and they must be accumulators in
+ * order to have the time necessary to move things.
+ */
+ assert(src[0].mux <= QPU_MUX_R3);
+
+ queue(block,
+ qpu_m_rot(dst, src[0], qinst->src[1].index -
+ QPU_SMALL_IMM_MUL_ROT) | unpack);
+ set_last_cond_mul(block, qinst->cond);
+ handled_qinst_cond = true;
+ set_last_dst_pack(block, qinst);
break;
case QOP_MS_MASK: