diff options
author | Eric Anholt <eric@anholt.net> | 2014-09-07 14:38:24 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2014-12-08 16:08:13 -0800 |
commit | 24c5ab7bbbd2a4e9207c6cba66945f72ca5c7a3e (patch) | |
tree | 280eb33d6ea30905341e7bdd562b8c3166cc61e3 /src/gallium | |
parent | dfbf58c439870d46abcc8868b8ca145318aee125 (diff) |
vc4: Drop dependency on r3 for color packing.
We can avoid it by carefully ordering the packing. This is important as a
step in giving r3 to the register allocator.
total instructions in shared programs: 56087 -> 55957 (-0.23%)
instructions in affected programs: 18368 -> 18238 (-0.71%)
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 3cb709f11fe..856f84444d5 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -296,17 +296,40 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; - case QOP_PACK_COLORS: + case QOP_PACK_COLORS: { + /* We have to be careful not to start writing over one + * of our source values when incrementally writing the + * destination. So, if the dst is one of the srcs, we + * pack that one first (and we pack 4 channels at once + * for the first pack). + */ + struct qpu_reg first_pack = src[0]; + for (int i = 0; i < 4; i++) { + if (src[i].mux == dst.mux && + src[i].addr == dst.addr) { + first_pack = dst; + break; + } + } + queue(c, qpu_m_MOV(dst, first_pack)); + *last_inst(c) |= QPU_PM; + *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888, + QPU_PACK); + for (int i = 0; i < 4; i++) { - queue(c, qpu_m_MOV(qpu_r3(), src[i])); + if (src[i].mux == first_pack.mux && + src[i].addr == first_pack.addr) { + continue; + } + + queue(c, qpu_m_MOV(dst, src[i])); *last_inst(c) |= QPU_PM; *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i, QPU_PACK); } - queue(c, qpu_a_MOV(dst, qpu_r3())); - break; + } case QOP_FRAG_X: queue(c, qpu_a_ITOF(dst, |