aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2019-10-31 14:56:45 -0400
committerAlyssa Rosenzweig <[email protected]>2019-11-04 15:36:08 -0500
commit762623381df3f6b9ae02b9c1c4528f867a93d425 (patch)
tree87a20026cc05410177ced87426a097331453e956
parentbf5508f7b9698d3aa657b810febbf0e97e3b7b20 (diff)
pan/midgard: Extend swizzle packing for vec4/16-bit
We would like to pack not just xyzw swizzles but also efgh swizzles. This should work for vec4/16-bit. More work will be needed to pack swizzles for vec8/16-bit and even more work for 8-bit, of course. Signed-off-by: Alyssa Rosenzweig <[email protected]>
-rw-r--r--src/panfrost/midgard/midgard_emit.c27
1 files changed, 24 insertions, 3 deletions
diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c
index f3f38c418d4..8bc4bb0e0dd 100644
--- a/src/panfrost/midgard/midgard_emit.c
+++ b/src/panfrost/midgard/midgard_emit.c
@@ -136,17 +136,38 @@ mir_pack_swizzle_alu(midgard_instruction *ins)
for (unsigned i = 0; i < 2; ++i) {
unsigned packed = 0;
- /* TODO: non-32-bit, non-vec4 */
+ /* For 32-bit, swizzle packing is stupid-simple. For 16-bit,
+ * the strategy is to check whether the nibble we're on is
+ * upper or lower. We need all components to be on the same
+ * "side"; that much is enforced by the ISA and should have
+ * been lowered. TODO: 8-bit/64-bit packing. TODO: vec8 */
+
+ unsigned first = ins->mask ? ffs(ins->mask) - 1 : 0;
+ bool upper = ins->swizzle[i][first] > 3;
+
+ if (upper && ins->mask)
+ assert(mir_srcsize(ins, i) <= midgard_reg_mode_16);
+
for (unsigned c = 0; c < 4; ++c) {
unsigned v = ins->swizzle[i][c];
- /* Check vec4 */
- assert(v <= 3);
+ bool t_upper = v > 3;
+
+ /* Ensure we're doing something sane */
+
+ if (ins->mask & (1 << c)) {
+ assert(t_upper == upper);
+ assert(v <= 7);
+ }
+
+ /* Use the non upper part */
+ v &= 0x3;
packed |= v << (2 * c);
}
src[i].swizzle = packed;
+ src[i].rep_high = upper;
}
ins->alu.src1 = vector_alu_srco_unsigned(src[0]);