summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2019-01-18 11:38:17 -0800
committerFrancisco Jerez <[email protected]>2019-02-21 14:07:25 -0800
commit7272fe9c0861065d6031e990e5f941c0036ebae8 (patch)
tree7445e8cdab3dd9db1028e4ae080b6babf5e6f670 /src/intel/compiler
parente03be78252afa8f1033b0824eff8d48df4fd6727 (diff)
intel/fs: Rely on undocumented unrestricted regioning for 32x16-bit integer multiply.
Even though the hardware spec claims that any "integer DWord multiply" operation is affected by the regioning restrictions of CHV/BXT/GLK, this is inconsistent with the behavior of the simulator and with empirical evidence -- Return false from has_dst_aligned_region_restriction() for such instructions as a micro-optimization. Tested-by: Anuj Phogat <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r--src/intel/compiler/brw_ir_fs.h14
1 files changed, 11 insertions, 3 deletions
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index c4427a658b0..56a4bdc6e52 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -542,11 +542,19 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo,
const fs_inst *inst)
{
const brw_reg_type exec_type = get_exec_type(inst);
- const bool is_int_multiply = !brw_reg_type_is_floating_point(exec_type) &&
- (inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD);
+ /* Even though the hardware spec claims that "integer DWord multiply"
+ * operations are restricted, empirical evidence and the behavior of the
+ * simulator suggest that only 32x32-bit integer multiplication is
+ * restricted.
+ */
+ const bool is_dword_multiply = !brw_reg_type_is_floating_point(exec_type) &&
+ ((inst->opcode == BRW_OPCODE_MUL &&
+ MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
+ (inst->opcode == BRW_OPCODE_MAD &&
+ MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
- (type_sz(exec_type) == 4 && is_int_multiply))
+ (type_sz(exec_type) == 4 && is_dword_multiply))
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
else
return false;