summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/compiler/brw_fs_lower_conversions.cpp24
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp14
2 files changed, 26 insertions, 12 deletions
diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp b/src/intel/compiler/brw_fs_lower_conversions.cpp
index 663c9674c49..e27e2402746 100644
--- a/src/intel/compiler/brw_fs_lower_conversions.cpp
+++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
@@ -43,6 +43,24 @@ supports_type_conversion(const fs_inst *inst) {
}
}
+/* From the SKL PRM Vol 2a, "Move":
+ *
+ * "A mov with the same source and destination type, no source modifier,
+ * and no saturation is a raw move. A packed byte destination region (B
+ * or UB type with HorzStride == 1 and ExecSize > 1) can only be written
+ * using raw move."
+ */
+static bool
+is_byte_raw_mov (const fs_inst *inst)
+{
+ return type_sz(inst->dst.type) == 1 &&
+ inst->opcode == BRW_OPCODE_MOV &&
+ inst->src[0].type == inst->dst.type &&
+ !inst->saturate &&
+ !inst->src[0].negate &&
+ !inst->src[0].abs;
+}
+
bool
fs_visitor::lower_conversions()
{
@@ -54,7 +72,8 @@ fs_visitor::lower_conversions()
bool saturate = inst->saturate;
if (supports_type_conversion(inst)) {
- if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
+ if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
+ !is_byte_raw_mov(inst)) {
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
* Single Precision Float":
*
@@ -64,6 +83,9 @@ fs_visitor::lower_conversions()
* So we need to allocate a temporary that's two registers, and then do
* a strided MOV to get the lower DWord of every Qword that has the
* result.
+ *
+ * This restriction applies, in general, whenever we convert to
+ * a type with a smaller bit-size.
*/
fs_reg temp = ibld.vgrf(get_exec_type(inst));
fs_reg strided_temp = subscript(temp, dst.type, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index c7f7bc21b8a..1ce89520bf1 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -755,19 +755,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
*/
case nir_op_f2f16_undef:
- case nir_op_i2i16:
- case nir_op_u2u16: {
- /* TODO: Fixing aligment rules for conversions from 32-bits to
- * 16-bit types should be moved to lower_conversions
- */
- fs_reg tmp = bld.vgrf(op[0].type, 1);
- tmp = subscript(tmp, result.type, 0);
- inst = bld.MOV(tmp, op[0]);
- inst->saturate = instr->dest.saturate;
- inst = bld.MOV(result, tmp);
+ inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
- }
case nir_op_f2f64:
case nir_op_f2i64:
@@ -807,6 +797,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_f2u16:
case nir_op_i2i32:
case nir_op_u2u32:
+ case nir_op_i2i16:
+ case nir_op_u2u16:
case nir_op_i2f16:
case nir_op_u2f16:
inst = bld.MOV(result, op[0]);