aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/compiler/brw_eu_emit.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index a18cfa4239f..fae74cf80ab 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3430,8 +3430,30 @@ brw_broadcast(struct brw_codegen *p,
brw_pop_insn_state(p);
/* Use indirect addressing to fetch the specified component. */
- brw_MOV(p, dst,
- retype(brw_vec1_indirect(addr.subnr, offset), src.type));
+ if (type_sz(src.type) > 4 &&
+ (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+ /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
+ *
+ * "When source or destination datatype is 64b or operation is
+ * integer DWord multiply, indirect addressing must not be
+ * used."
+ *
+ * To work around both of this issue, we do two integer MOVs
+ * insead of one 64-bit MOV. Because no double value should ever
+ * cross a register boundary, it's safe to use the immediate
+ * offset in the indirect here to handle adding 4 bytes to the
+ * offset and avoid the extra ADD to the register file.
+ */
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+ retype(brw_vec1_indirect(addr.subnr, offset),
+ BRW_REGISTER_TYPE_D));
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+ retype(brw_vec1_indirect(addr.subnr, offset + 4),
+ BRW_REGISTER_TYPE_D));
+ } else {
+ brw_MOV(p, dst,
+ retype(brw_vec1_indirect(addr.subnr, offset), src.type));
+ }
} else {
/* In SIMD4x2 mode the index can be either zero or one, replicate it
* to all bits of a flag register,