2 files changed, 48 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 35171ceb5e7..6d73bb2faec 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1891,7 +1891,6 @@ vec4_visitor::convert_to_hw_regs()
             const unsigned width = REG_SIZE / 2 / MAX2(4, type_size);
             reg = byte_offset(brw_vecn_grf(width, src.nr, 0), src.offset);
             reg.type = src.type;
-            reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
             break;
@@ -1905,7 +1904,6 @@ vec4_visitor::convert_to_hw_regs()
                                      src.offset),
                          0, width, 1);
             reg.type = src.type;
-            reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
 
@@ -1914,8 +1912,13 @@ vec4_visitor::convert_to_hw_regs()
             break;
          }
 
-         case ARF:
          case FIXED_GRF:
+            if (type_sz(src.type) == 8) {
+               reg = src.as_brw_reg();
+               break;
+            }
+            /* fallthrough */
+         case ARF:
          case IMM:
             continue;
 
@@ -1929,6 +1932,7 @@ vec4_visitor::convert_to_hw_regs()
             unreachable("not reached");
          }
 
+         apply_logical_swizzle(&reg, inst, i);
          src = reg;
       }
 
@@ -2229,6 +2233,45 @@ vec4_visitor::scalarize_df()
    return progress;
 }
 
+/* The align16 hardware can only do 32-bit swizzle channels, so we need to
+ * translate the logical 64-bit swizzle channels that we use in the Vec4 IR
+ * to 32-bit swizzle channels in hardware registers.
+ *
+ * @inst and @arg identify the original vec4 IR source operand we need to
+ * translate the swizzle for and @hw_reg is the hardware register where we
+ * will write the hardware swizzle to use.
+ *
+ * This pass assumes that Align16/DF instructions have been fully scalarized
+ * previously so there is just one 64-bit swizzle channel to deal with for any
+ * given Vec4 IR source.
+ */
+void
+vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
+                                    vec4_instruction *inst, int arg)
+{
+   src_reg reg = inst->src[arg];
+
+   if (reg.file == BAD_FILE || reg.file == BRW_IMMEDIATE_VALUE)
+      return;
+
+   /* If this is not a 64-bit operand or this is a scalar instruction we don't
+    * need to do anything about the swizzles.
+    */
+   if(type_sz(reg.type) < 8 || is_align1_df(inst)) {
+      hw_reg->swizzle = reg.swizzle;
+      return;
+   }
+
+   /* Otherwise we should have scalarized the instruction, so take the single
+    * 64-bit logical swizzle channel and translate it to 32-bit
+    */
+   assert(brw_is_single_value_swizzle(reg.swizzle));
+
+   unsigned swizzle = BRW_GET_SWZ(reg.swizzle, 0);
+   hw_reg->swizzle = BRW_SWIZZLE4(swizzle * 2, swizzle * 2 + 1,
+                                  swizzle * 2, swizzle * 2 + 1);
+}
+
 bool
 vec4_visitor::run()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 253be677233..7b7d10c5c9c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -163,6 +163,8 @@ public:
 
    bool lower_simd_width();
    bool scalarize_df();
+   void apply_logical_swizzle(struct brw_reg *hw_reg,
+                              vec4_instruction *inst, int arg);
 
    vec4_instruction *emit(vec4_instruction *inst);