summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2017-09-21 16:42:02 -0700
committerMatt Turner <[email protected]>2017-10-04 14:08:54 -0700
commitb541945c2027990ac571184bbf8e01285be0e33a (patch)
treeb36bebb4f41890fa8146cc0c3f4cc635c75009d0
parent2082c32950a9e3a4debc00b0d6da85404b923920 (diff)
i965/fs: Unpack count argument to 64-bit shift ops on Atom
64-bit operations on Atom parts have additional restrictions over their big-core counterparts (validated by later patches). Specifically, the restriction that "Source and Destination horizontal stride must be aligned to the same qword" is violated by most shift operations since NIR uses a 32-bit value as the shift count argument, and this causes instructions like shl(8) g19<1>Q g5<4,4,1>Q g23<4,4,1>UD where src1 has a 32-bit stride, but the dest and src0 have a 64-bit stride. This caused ~4 pixels in the ARB_shader_ballot piglit test fs-readInvocation-uint.shader_test to be incorrect. Unfortunately no ARB_gpu_shader_int64 test hit this case because they operate on uniforms, and their scalar regions are an exception to the restriction. We work around this by effectively unpacking the shift count, so that we can read it with a 64-bit stride in the shift instruction. Unfortunately the unpack (a MOV with a dst stride of 2) is a partial write, and cannot be copy-propagated or CSE'd. Bugzilla: https://bugs.freedesktop.org/101984
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp34
1 files changed, 28 insertions, 6 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index aa57bb9d786..7eae0520277 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1267,14 +1267,36 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
unreachable("not reached: should have been lowered");
case nir_op_ishl:
- bld.SHL(result, op[0], op[1]);
- break;
case nir_op_ishr:
- bld.ASR(result, op[0], op[1]);
- break;
- case nir_op_ushr:
- bld.SHR(result, op[0], op[1]);
+ case nir_op_ushr: {
+ fs_reg shift_count = op[1];
+
+ if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
+ if (op[1].file == VGRF &&
+ (result.type == BRW_REGISTER_TYPE_Q ||
+ result.type == BRW_REGISTER_TYPE_UQ)) {
+ shift_count = fs_reg(VGRF, alloc.allocate(dispatch_width / 4),
+ BRW_REGISTER_TYPE_UD);
+ shift_count.stride = 2;
+ bld.MOV(shift_count, op[1]);
+ }
+ }
+
+ switch (instr->op) {
+ case nir_op_ishl:
+ bld.SHL(result, op[0], shift_count);
+ break;
+ case nir_op_ishr:
+ bld.ASR(result, op[0], shift_count);
+ break;
+ case nir_op_ushr:
+ bld.SHR(result, op[0], shift_count);
+ break;
+ default:
+ unreachable("not reached");
+ }
break;
+ }
case nir_op_pack_half_2x16_split:
bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);