From 24c5ab7bbbd2a4e9207c6cba66945f72ca5c7a3e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Sep 2014 14:38:24 -0700 Subject: vc4: Drop dependency on r3 for color packing. We can avoid it by carefully ordering the packing. This is important as a step in giving r3 to the register allocator. total instructions in shared programs: 56087 -> 55957 (-0.23%) instructions in affected programs: 18368 -> 18238 (-0.71%) --- src/gallium/drivers/vc4/vc4_qpu_emit.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/vc4') diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 3cb709f11fe..856f84444d5 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -296,17 +296,40 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; - case QOP_PACK_COLORS: + case QOP_PACK_COLORS: { + /* We have to be careful not to start writing over one + * of our source values when incrementally writing the + * destination. So, if the dst is one of the srcs, we + * pack that one first (and we pack 4 channels at once + * for the first pack). + */ + struct qpu_reg first_pack = src[0]; + for (int i = 0; i < 4; i++) { + if (src[i].mux == dst.mux && + src[i].addr == dst.addr) { + first_pack = dst; + break; + } + } + queue(c, qpu_m_MOV(dst, first_pack)); + *last_inst(c) |= QPU_PM; + *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888, + QPU_PACK); + for (int i = 0; i < 4; i++) { - queue(c, qpu_m_MOV(qpu_r3(), src[i])); + if (src[i].mux == first_pack.mux && + src[i].addr == first_pack.addr) { + continue; + } + + queue(c, qpu_m_MOV(dst, src[i])); *last_inst(c) |= QPU_PM; *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i, QPU_PACK); } - queue(c, qpu_a_MOV(dst, qpu_r3())); - break; + } case QOP_FRAG_X: queue(c, qpu_a_ITOF(dst, -- cgit v1.2.3