i965/compiler: handle conversion to smaller type in the lowering pass for that

The lowering pass was specialized to act on 64-bit to 32-bit conversions only, but the implementation is valid for other cases. Reviewed-by: Jason Ekstrand <[email protected]>
author: Iago Toral Quiroga <[email protected]> 2018-03-06 12:14:05 +0100
committer: Iago Toral Quiroga <[email protected]> 2018-05-03 11:40:25 +0200
commit: 96b51537908cd2aace85f54b437eeb72e6346b7e (patch)
tree: 75ca6422d59ee05e39e6d41c1b76d12d104493af /src/intel
parent: 5361a87ee73848d9f7fab0b715563b3d9de7f3df (diff)
2 files changed, 7 insertions, 12 deletions
diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp b/src/intel/compiler/brw_fs_lower_conversions.cpp
index 663c9674c49..f95b39d3e86 100644
--- a/src/intel/compiler/brw_fs_lower_conversions.cpp
+++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
@@ -54,7 +54,7 @@ fs_visitor::lower_conversions()
       bool saturate = inst->saturate;
 
       if (supports_type_conversion(inst)) {
-         if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
+         if (type_sz(inst->dst.type) < get_exec_type_size(inst)) {
             /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
              * Single Precision Float":
              *
@@ -64,6 +64,9 @@ fs_visitor::lower_conversions()
              * So we need to allocate a temporary that's two registers, and then do
              * a strided MOV to get the lower DWord of every Qword that has the
              * result.
+             *
+             * This restriction applies, in general, whenever we convert to
+             * a type with a smaller bit-size.
              */
             fs_reg temp = ibld.vgrf(get_exec_type(inst));
             fs_reg strided_temp = subscript(temp, dst.type, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index db120efab80..2f341931ff5 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -755,19 +755,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        */
 
    case nir_op_f2f16_undef:
-   case nir_op_i2i16:
-   case nir_op_u2u16: {
-      /* TODO: Fixing aligment rules for conversions from 32-bits to
-       * 16-bit types should be moved to lower_conversions
-       */
-      fs_reg tmp = bld.vgrf(op[0].type, 1);
-      tmp = subscript(tmp, result.type, 0);
-      inst = bld.MOV(tmp, op[0]);
-      inst->saturate = instr->dest.saturate;
-      inst = bld.MOV(result, tmp);
+      inst = bld.MOV(result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
-   }
 
    case nir_op_f2f64:
    case nir_op_f2i64:
@@ -805,6 +795,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_f2u32:
    case nir_op_i2i32:
    case nir_op_u2u32:
+   case nir_op_i2i16:
+   case nir_op_u2u16:
       inst = bld.MOV(result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
author	Iago Toral Quiroga <[email protected]>	2018-03-06 12:14:05 +0100
committer	Iago Toral Quiroga <[email protected]>	2018-05-03 11:40:25 +0200
commit	96b51537908cd2aace85f54b437eeb72e6346b7e (patch)
tree	75ca6422d59ee05e39e6d41c1b76d12d104493af /src/intel
parent	5361a87ee73848d9f7fab0b715563b3d9de7f3df (diff)