diff options
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 34 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 19 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 50 |
4 files changed, 60 insertions, 49 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index bba02ca93f2..6bad1560b2f 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1843,32 +1843,22 @@ emit_frag_end(struct vc4_compile *c) qir_TLB_Z_WRITE(c, z); } - bool color_written = false; + struct qreg packed_color = c->undef; for (int i = 0; i < 4; i++) { - if (swizzled_outputs[i].file != QFILE_NULL) - color_written = true; - } - - struct qreg packed_color; - if (color_written) { - /* Fill in any undefined colors. The simulator will assertion - * fail if we read something that wasn't written, and I don't - * know what hardware does. - */ - for (int i = 0; i < 4; i++) { - if (swizzled_outputs[i].file == QFILE_NULL) - swizzled_outputs[i] = qir_uniform_f(c, 0.0); + if (swizzled_outputs[i].file == QFILE_NULL) + continue; + if (packed_color.file == QFILE_NULL) { + packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]); + } else { + packed_color = qir_PACK_8_F(c, + packed_color, + swizzled_outputs[i], + i); } - packed_color = qir_get_temp(c); - qir_emit(c, qir_inst4(QOP_PACK_COLORS, packed_color, - swizzled_outputs[0], - swizzled_outputs[1], - swizzled_outputs[2], - swizzled_outputs[3])); - } else { - packed_color = qir_uniform_ui(c, 0); } + if (packed_color.file == QFILE_NULL) + packed_color = qir_uniform_ui(c, 0); if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) { packed_color = vc4_logicop(c, packed_color, packed_dst_color); diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 3fd39413222..5f3b8ddc445 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -73,7 +73,11 @@ static const struct qir_op_info qir_op_info[] = { [QOP_RSQ] = { "rsq", 1, 1, false, true }, [QOP_EXP2] = { "exp2", 1, 2, false, true }, [QOP_LOG2] = { "log2", 1, 2, false, true }, - [QOP_PACK_COLORS] = { "pack_colors", 1, 4, false, true }, + [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true }, + [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true }, + [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true }, + [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true }, + [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true }, [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true }, [QOP_VPM_READ] = { "vpm_read", 0, 1, true }, [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index f7d59a80dac..6dac00fbbd8 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -100,7 +100,11 @@ enum qop { QOP_VW_SETUP, QOP_VR_SETUP, QOP_PACK_SCALED, - QOP_PACK_COLORS, + QOP_PACK_8888_F, + QOP_PACK_8A_F, + QOP_PACK_8B_F, + QOP_PACK_8C_F, + QOP_PACK_8D_F, QOP_VPM_READ, QOP_TLB_DISCARD_SETUP, QOP_TLB_STENCIL_SETUP, @@ -473,6 +477,11 @@ QIR_ALU1(RSQ) QIR_ALU1(EXP2) QIR_ALU1(LOG2) QIR_ALU2(PACK_SCALED) +QIR_ALU1(PACK_8888_F) +QIR_ALU2(PACK_8A_F) +QIR_ALU2(PACK_8B_F) +QIR_ALU2(PACK_8C_F) +QIR_ALU2(PACK_8D_F) QIR_ALU1(VARY_ADD_C) QIR_NODST_2(TEX_S) QIR_NODST_2(TEX_T) @@ -539,6 +548,14 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) } static inline struct qreg +qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan) +{ + struct qreg t = qir_get_temp(c); + qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val)); + return t; +} + +static inline struct qreg qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) { return qir_EXP2(c, qir_FMUL(c, diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 503f32a4c05..857d56e0f44 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -347,40 +347,40 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; - case QOP_PACK_COLORS: { - /* We have to be careful not to start writing over one - * of our source values when incrementally writing the - * destination. So, if the dst is one of the srcs, we - * pack that one first (and we pack 4 channels at once - * for the first pack). - */ - struct qpu_reg first_pack = src[0]; - for (int i = 0; i < 4; i++) { - if (src[i].mux == dst.mux && - src[i].addr == dst.addr) { - first_pack = dst; - break; - } - } - queue(c, qpu_m_MOV(dst, first_pack)); + case QOP_PACK_8888_F: + queue(c, qpu_m_MOV(dst, src[0])); *last_inst(c) |= QPU_PM; *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888, QPU_PACK); + break; - for (int i = 0; i < 4; i++) { - if (src[i].mux == first_pack.mux && - src[i].addr == first_pack.addr) { - continue; + case QOP_PACK_8A_F: + case QOP_PACK_8B_F: + case QOP_PACK_8C_F: + case QOP_PACK_8D_F: + /* If dst doesn't happen to already contain src[0], + * then we have to move it in. + */ + if (qinst->src[0].file != QFILE_NULL && + (src[0].mux != dst.mux || src[0].addr != dst.addr)) { + /* Don't overwrite src1 while setting up + * the dst! + */ + if (dst.mux == src[1].mux && + dst.addr == src[1].addr) { + queue(c, qpu_m_MOV(qpu_rb(31), src[1])); + src[1] = qpu_rb(31); } - queue(c, qpu_m_MOV(dst, src[i])); - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i, - QPU_PACK); + queue(c, qpu_m_MOV(dst, src[0])); } + queue(c, qpu_m_MOV(dst, src[1])); + *last_inst(c) |= QPU_PM; + *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + + qinst->op - QOP_PACK_8A_F, + QPU_PACK); break; - } case QOP_FRAG_X: queue(c, qpu_a_ITOF(dst, |