summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2014-09-07 14:38:24 -0700
committerEric Anholt <eric@anholt.net>2014-12-08 16:08:13 -0800
commit24c5ab7bbbd2a4e9207c6cba66945f72ca5c7a3e (patch)
tree280eb33d6ea30905341e7bdd562b8c3166cc61e3 /src/gallium
parentdfbf58c439870d46abcc8868b8ca145318aee125 (diff)
vc4: Drop dependency on r3 for color packing.
We can avoid it by carefully ordering the packing. This is important as a step in giving r3 to the register allocator. total instructions in shared programs: 56087 -> 55957 (-0.23%) instructions in affected programs: 18368 -> 18238 (-0.71%)
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c31
1 files changed, 27 insertions, 4 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 3cb709f11fe..856f84444d5 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -296,17 +296,40 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
- case QOP_PACK_COLORS:
+ case QOP_PACK_COLORS: {
+ /* We have to be careful not to start writing over one
+ * of our source values when incrementally writing the
+ * destination. So, if the dst is one of the srcs, we
+ * pack that one first (and we pack 4 channels at once
+ * for the first pack).
+ */
+ struct qpu_reg first_pack = src[0];
+ for (int i = 0; i < 4; i++) {
+ if (src[i].mux == dst.mux &&
+ src[i].addr == dst.addr) {
+ first_pack = dst;
+ break;
+ }
+ }
+ queue(c, qpu_m_MOV(dst, first_pack));
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
+ QPU_PACK);
+
for (int i = 0; i < 4; i++) {
- queue(c, qpu_m_MOV(qpu_r3(), src[i]));
+ if (src[i].mux == first_pack.mux &&
+ src[i].addr == first_pack.addr) {
+ continue;
+ }
+
+ queue(c, qpu_m_MOV(dst, src[i]));
*last_inst(c) |= QPU_PM;
*last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i,
QPU_PACK);
}
- queue(c, qpu_a_MOV(dst, qpu_r3()));
-
break;
+ }
case QOP_FRAG_X:
queue(c, qpu_a_ITOF(dst,