i965/vec4: translate 64-bit swizzles to 32-bit

The hardware can only operate with 32-bit swizzles, which is a rather limiting restriction. However, the idea is not to expose this to the optimization passes, which would be a mess to deal with. Instead, we let the bulk of the vec4 backend ignore this fact and we fix the swizzles right at codegen time. At the moment the pass only needs to handle single value swizzles thanks to the scalarization pass that runs before it. Notice that this only works for X/Y swizzles. We will add support for Z/W swizzles in the next patch, since they need a bit more work. v2 (Sam): - Do not expand swizzle of 64-bit immediate values. v3: - Do this after translation to hardware registers instead of doing it right before so we don't need the force_vstride0 flag (Curro). - Squashed patch that included FIXED_GRF in the list of register files that need this translation (Iago). - Remove swizzle assignments for VGRF and UNIFORM files in convert_to_hw_regs(), they will be set by apply_logical_swizzle() (Iago). Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]> Reviewed-by: Matt Turner <[email protected]>
author: Iago Toral Quiroga <[email protected]> 2016-05-24 11:01:27 +0200
committer: Samuel Iglesias Gonsálvez <[email protected]> 2017-01-03 11:26:51 +0100
commit: e238601a2da9512c0fd263e8378f30498a0a1507 (patch)
tree: bb9232a19b2b8e292dc960e67907d71ec22985f8 /src/mesa/drivers
parent: fb7cb853c964db44ab99c1592e1ef7dec2f0c25b (diff)
2 files changed, 48 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 35171ceb5e7..6d73bb2faec 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1891,7 +1891,6 @@ vec4_visitor::convert_to_hw_regs()
             const unsigned width = REG_SIZE / 2 / MAX2(4, type_size);
             reg = byte_offset(brw_vecn_grf(width, src.nr, 0), src.offset);
             reg.type = src.type;
-            reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
             break;
@@ -1905,7 +1904,6 @@ vec4_visitor::convert_to_hw_regs()
                                      src.offset),
                          0, width, 1);
             reg.type = src.type;
-            reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
 
@@ -1914,8 +1912,13 @@ vec4_visitor::convert_to_hw_regs()
             break;
          }
 
-         case ARF:
          case FIXED_GRF:
+            if (type_sz(src.type) == 8) {
+               reg = src.as_brw_reg();
+               break;
+            }
+            /* fallthrough */
+         case ARF:
          case IMM:
             continue;
 
@@ -1929,6 +1932,7 @@ vec4_visitor::convert_to_hw_regs()
             unreachable("not reached");
          }
 
+         apply_logical_swizzle(&reg, inst, i);
          src = reg;
       }
 
@@ -2229,6 +2233,45 @@ vec4_visitor::scalarize_df()
    return progress;
 }
 
+/* The align16 hardware can only do 32-bit swizzle channels, so we need to
+ * translate the logical 64-bit swizzle channels that we use in the Vec4 IR
+ * to 32-bit swizzle channels in hardware registers.
+ *
+ * @inst and @arg identify the original vec4 IR source operand we need to
+ * translate the swizzle for and @hw_reg is the hardware register where we
+ * will write the hardware swizzle to use.
+ *
+ * This pass assumes that Align16/DF instructions have been fully scalarized
+ * previously so there is just one 64-bit swizzle channel to deal with for any
+ * given Vec4 IR source.
+ */
+void
+vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
+                                    vec4_instruction *inst, int arg)
+{
+   src_reg reg = inst->src[arg];
+
+   if (reg.file == BAD_FILE || reg.file == BRW_IMMEDIATE_VALUE)
+      return;
+
+   /* If this is not a 64-bit operand or this is a scalar instruction we don't
+    * need to do anything about the swizzles.
+    */
+   if(type_sz(reg.type) < 8 || is_align1_df(inst)) {
+      hw_reg->swizzle = reg.swizzle;
+      return;
+   }
+
+   /* Otherwise we should have scalarized the instruction, so take the single
+    * 64-bit logical swizzle channel and translate it to 32-bit
+    */
+   assert(brw_is_single_value_swizzle(reg.swizzle));
+
+   unsigned swizzle = BRW_GET_SWZ(reg.swizzle, 0);
+   hw_reg->swizzle = BRW_SWIZZLE4(swizzle * 2, swizzle * 2 + 1,
+                                  swizzle * 2, swizzle * 2 + 1);
+}
+
 bool
 vec4_visitor::run()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 253be677233..7b7d10c5c9c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -163,6 +163,8 @@ public:
 
    bool lower_simd_width();
    bool scalarize_df();
+   void apply_logical_swizzle(struct brw_reg *hw_reg,
+                              vec4_instruction *inst, int arg);
 
    vec4_instruction *emit(vec4_instruction *inst);
author	Iago Toral Quiroga <[email protected]>	2016-05-24 11:01:27 +0200
committer	Samuel Iglesias Gonsálvez <[email protected]>	2017-01-03 11:26:51 +0100
commit	e238601a2da9512c0fd263e8378f30498a0a1507 (patch)
tree	bb9232a19b2b8e292dc960e67907d71ec22985f8 /src/mesa/drivers
parent	fb7cb853c964db44ab99c1592e1ef7dec2f0c25b (diff)