summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuan A. Suarez Romero <[email protected]>2016-08-03 11:51:44 +0000
committerFrancisco Jerez <[email protected]>2017-04-14 14:56:07 -0700
commit3198ce3f96848856206e7b2e54a53024bcca7737 (patch)
treee5808af7f3fd115980b42c8d97ac8f572c20f03e
parent571cbd05ebfb8bef22277c5758afc82f5dd6a3f2 (diff)
i965/fs: fix lower SIMD width for IVB/BYT's MOV_INDIRECT
According to the IVB and HSW PRMs: "2.When the destination requires two registers and the sources are indirect, the sources must use 1x1 regioning mode." So for DF instructions the execution size is not limited by the number of address registers that are available, but by the EU decompression logic not handling VxH indirect addressing correctly. This patch limits the SIMD width to 4 in this case. v2: - Fix typo (Matt). - Fix condition (Curro) v3: - Add spec quote (Curro) Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]> Signed-off-by: Juan A. Suarez Romero <[email protected]> Reviewed-by: Francisco Jerez <[email protected]>
-rw-r--r--src/intel/compiler/brw_fs.cpp17
1 files changed, 14 insertions, 3 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index f96e0a39899..cae15542fa1 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4891,11 +4891,22 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
return MIN2(8, inst->exec_size);
- case SHADER_OPCODE_MOV_INDIRECT:
- /* Prior to Broadwell, we only have 8 address subregisters */
+ case SHADER_OPCODE_MOV_INDIRECT: {
+ /* From IVB and HSW PRMs:
+ *
+ * "2.When the destination requires two registers and the sources are
+ * indirect, the sources must use 1x1 regioning mode.
+ *
+ * In case of DF instructions in HSW/IVB, the exec_size is limited by
+ * the EU decompression logic not handling VxH indirect addressing
+ * correctly.
+ */
+ const unsigned max_size = (devinfo->gen >= 8 ? 2 : 1) * REG_SIZE;
+ /* Prior to Broadwell, we only have 8 address subregisters. */
return MIN3(devinfo->gen >= 8 ? 16 : 8,
- 2 * REG_SIZE / (inst->dst.stride * type_sz(inst->dst.type)),
+ max_size / (inst->dst.stride * type_sz(inst->dst.type)),
inst->exec_size);
+ }
case SHADER_OPCODE_LOAD_PAYLOAD: {
const unsigned reg_count =