summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c34
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h19
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c50
4 files changed, 60 insertions, 49 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index bba02ca93f2..6bad1560b2f 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1843,32 +1843,22 @@ emit_frag_end(struct vc4_compile *c)
qir_TLB_Z_WRITE(c, z);
}
- bool color_written = false;
+ struct qreg packed_color = c->undef;
for (int i = 0; i < 4; i++) {
- if (swizzled_outputs[i].file != QFILE_NULL)
- color_written = true;
- }
-
- struct qreg packed_color;
- if (color_written) {
- /* Fill in any undefined colors. The simulator will assertion
- * fail if we read something that wasn't written, and I don't
- * know what hardware does.
- */
- for (int i = 0; i < 4; i++) {
- if (swizzled_outputs[i].file == QFILE_NULL)
- swizzled_outputs[i] = qir_uniform_f(c, 0.0);
+ if (swizzled_outputs[i].file == QFILE_NULL)
+ continue;
+ if (packed_color.file == QFILE_NULL) {
+ packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
+ } else {
+ packed_color = qir_PACK_8_F(c,
+ packed_color,
+ swizzled_outputs[i],
+ i);
}
- packed_color = qir_get_temp(c);
- qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color,
- swizzled_outputs[0],
- swizzled_outputs[1],
- swizzled_outputs[2],
- swizzled_outputs[3]));
- } else {
- packed_color = qir_uniform_ui(c, 0);
}
+ if (packed_color.file == QFILE_NULL)
+ packed_color = qir_uniform_ui(c, 0);
if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
packed_color = vc4_logicop(c, packed_color, packed_dst_color);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 3fd39413222..5f3b8ddc445 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -73,7 +73,11 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_RSQ] = { "rsq", 1, 1, false, true },
[QOP_EXP2] = { "exp2", 1, 2, false, true },
[QOP_LOG2] = { "log2", 1, 2, false, true },
- [QOP_PACK_COLORS] = { "pack_colors", 1, 4, false, true },
+ [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true },
+ [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true },
+ [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true },
+ [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true },
+ [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true },
[QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
[QOP_VPM_READ] = { "vpm_read", 0, 1, true },
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index f7d59a80dac..6dac00fbbd8 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -100,7 +100,11 @@ enum qop {
QOP_VW_SETUP,
QOP_VR_SETUP,
QOP_PACK_SCALED,
- QOP_PACK_COLORS,
+ QOP_PACK_8888_F,
+ QOP_PACK_8A_F,
+ QOP_PACK_8B_F,
+ QOP_PACK_8C_F,
+ QOP_PACK_8D_F,
QOP_VPM_READ,
QOP_TLB_DISCARD_SETUP,
QOP_TLB_STENCIL_SETUP,
@@ -473,6 +477,11 @@ QIR_ALU1(RSQ)
QIR_ALU1(EXP2)
QIR_ALU1(LOG2)
QIR_ALU2(PACK_SCALED)
+QIR_ALU1(PACK_8888_F)
+QIR_ALU2(PACK_8A_F)
+QIR_ALU2(PACK_8B_F)
+QIR_ALU2(PACK_8C_F)
+QIR_ALU2(PACK_8D_F)
QIR_ALU1(VARY_ADD_C)
QIR_NODST_2(TEX_S)
QIR_NODST_2(TEX_T)
@@ -539,6 +548,14 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
}
static inline struct qreg
+qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
+{
+ struct qreg t = qir_get_temp(c);
+ qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
+ return t;
+}
+
+static inline struct qreg
qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
{
return qir_EXP2(c, qir_FMUL(c,
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 503f32a4c05..857d56e0f44 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -347,40 +347,40 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
- case QOP_PACK_COLORS: {
- /* We have to be careful not to start writing over one
- * of our source values when incrementally writing the
- * destination. So, if the dst is one of the srcs, we
- * pack that one first (and we pack 4 channels at once
- * for the first pack).
- */
- struct qpu_reg first_pack = src[0];
- for (int i = 0; i < 4; i++) {
- if (src[i].mux == dst.mux &&
- src[i].addr == dst.addr) {
- first_pack = dst;
- break;
- }
- }
- queue(c, qpu_m_MOV(dst, first_pack));
+ case QOP_PACK_8888_F:
+ queue(c, qpu_m_MOV(dst, src[0]));
*last_inst(c) |= QPU_PM;
*last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
QPU_PACK);
+ break;
- for (int i = 0; i < 4; i++) {
- if (src[i].mux == first_pack.mux &&
- src[i].addr == first_pack.addr) {
- continue;
+ case QOP_PACK_8A_F:
+ case QOP_PACK_8B_F:
+ case QOP_PACK_8C_F:
+ case QOP_PACK_8D_F:
+ /* If dst doesn't happen to already contain src[0],
+ * then we have to move it in.
+ */
+ if (qinst->src[0].file != QFILE_NULL &&
+ (src[0].mux != dst.mux || src[0].addr != dst.addr)) {
+ /* Don't overwrite src1 while setting up
+ * the dst!
+ */
+ if (dst.mux == src[1].mux &&
+ dst.addr == src[1].addr) {
+ queue(c, qpu_m_MOV(qpu_rb(31), src[1]));
+ src[1] = qpu_rb(31);
}
- queue(c, qpu_m_MOV(dst, src[i]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i,
- QPU_PACK);
+ queue(c, qpu_m_MOV(dst, src[0]));
}
+ queue(c, qpu_m_MOV(dst, src[1]));
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A +
+ qinst->op - QOP_PACK_8A_F,
+ QPU_PACK);
break;
- }
case QOP_FRAG_X:
queue(c, qpu_a_ITOF(dst,