i965/compiler: handle conversion to smaller type in the lowering pass for that

This rollbacks the revert of this same patch introduced in commit 7b9c15628aae8729118b648f5f473e6ac926b99b. And also squahes the following patch to prevent a piglit regression caused by this change: intel/compiler: Fix lower_conversions for 8-bit types. Author: Jose Maria Casanova Crespo <[email protected]> For 8-bit types the execution type is word. A byte raw MOV has 16-bit execution type and 8-bit destination and it shouldn't be considered a conversion case. So there is no need to change alignment and enter in lower_conversions for these instructions. Fixes a regresion in the piglit test "glsl-fs-shader-stencil-export" that is introduced with this patch from the Vulkan shaderInt16 series: 'i965/compiler: handle conversion to smaller type in the lowering pass for that'. The problem is caused because there is already a case in the driver that injects Byte instructions like this: mov(8) g127<1>UB g2<32,8,4>UB And the aforementioned pass was not accounting for the special handling of the execution size of Byte instructions. This patch fixes this. v2: (Jason Ekstrand) - Simplify is_byte_raw_mov, include reference to PRM and not consider B <-> UB conversions as raw movs. v3: (Matt Turner) - Indentation style fixes. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106393 Tested-by: Mark Janes <[email protected]> Reviewed-by: Matt Turner <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
author: Iago Toral Quiroga <[email protected]> 2018-05-04 11:33:07 +0200
committer: Iago Toral Quiroga <[email protected]> 2018-05-05 12:41:02 +0200
commit: 5a12bdac09496e00b746421f4c10d77f9b7a8e66 (patch)
tree: a63ad8da548e3b7de1003e0529771c8f1b1aafd3
parent: a75f967388b3224fb4da02f5bd01916018b639d4 (diff)
2 files changed, 26 insertions, 12 deletions
diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp b/src/intel/compiler/brw_fs_lower_conversions.cpp
index 663c9674c49..e27e2402746 100644
--- a/src/intel/compiler/brw_fs_lower_conversions.cpp
+++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
@@ -43,6 +43,24 @@ supports_type_conversion(const fs_inst *inst) {
    }
 }
 
+/* From the SKL PRM Vol 2a, "Move":
+ *
+ *    "A mov with the same source and destination type, no source modifier,
+ *     and no saturation is a raw move. A packed byte destination region (B
+ *     or UB type with HorzStride == 1 and ExecSize > 1) can only be written
+ *     using raw move."
+ */
+static bool
+is_byte_raw_mov (const fs_inst *inst)
+{
+   return type_sz(inst->dst.type) == 1 &&
+          inst->opcode == BRW_OPCODE_MOV &&
+          inst->src[0].type == inst->dst.type &&
+          !inst->saturate &&
+          !inst->src[0].negate &&
+          !inst->src[0].abs;
+}
+
 bool
 fs_visitor::lower_conversions()
 {
@@ -54,7 +72,8 @@ fs_visitor::lower_conversions()
       bool saturate = inst->saturate;
 
       if (supports_type_conversion(inst)) {
-         if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
+         if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
+             !is_byte_raw_mov(inst)) {
             /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
              * Single Precision Float":
              *
@@ -64,6 +83,9 @@ fs_visitor::lower_conversions()
              * So we need to allocate a temporary that's two registers, and then do
              * a strided MOV to get the lower DWord of every Qword that has the
              * result.
+             *
+             * This restriction applies, in general, whenever we convert to
+             * a type with a smaller bit-size.
              */
             fs_reg temp = ibld.vgrf(get_exec_type(inst));
             fs_reg strided_temp = subscript(temp, dst.type, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index c7f7bc21b8a..1ce89520bf1 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -755,19 +755,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        */
 
    case nir_op_f2f16_undef:
-   case nir_op_i2i16:
-   case nir_op_u2u16: {
-      /* TODO: Fixing aligment rules for conversions from 32-bits to
-       * 16-bit types should be moved to lower_conversions
-       */
-      fs_reg tmp = bld.vgrf(op[0].type, 1);
-      tmp = subscript(tmp, result.type, 0);
-      inst = bld.MOV(tmp, op[0]);
-      inst->saturate = instr->dest.saturate;
-      inst = bld.MOV(result, tmp);
+      inst = bld.MOV(result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
-   }
 
    case nir_op_f2f64:
    case nir_op_f2i64:
@@ -807,6 +797,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_f2u16:
    case nir_op_i2i32:
    case nir_op_u2u32:
+   case nir_op_i2i16:
+   case nir_op_u2u16:
    case nir_op_i2f16:
    case nir_op_u2f16:
       inst = bld.MOV(result, op[0]);
author	Iago Toral Quiroga <[email protected]>	2018-05-04 11:33:07 +0200
committer	Iago Toral Quiroga <[email protected]>	2018-05-05 12:41:02 +0200
commit	5a12bdac09496e00b746421f4c10d77f9b7a8e66 (patch)
tree	a63ad8da548e3b7de1003e0529771c8f1b1aafd3
parent	a75f967388b3224fb4da02f5bd01916018b639d4 (diff)