summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2017-10-17 14:45:12 -0700
committerJason Ekstrand <[email protected]>2017-11-07 10:37:52 -0800
commit6041a31e77680597614776e59edb12709ec2e019 (patch)
tree25bbe71e47fb2939934d71bcac1d5e2ee66e3a49
parent10e4feed39120072f38274b95e884422f72f360f (diff)
intel/eu: Fix broadcast instruction for 64-bit values on little-core
We're not using broadcast for any 32-bit types right now since we mostly use it for emit_uniformize on 32-bit buffer indices. However, SPIR-V subgroups are going to need it for 64-bit so let's make it work. Reviewed-by: Iago Toral Quiroga <[email protected]>
-rw-r--r--src/intel/compiler/brw_eu_emit.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index a18cfa4239f..fae74cf80ab 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3430,8 +3430,30 @@ brw_broadcast(struct brw_codegen *p,
brw_pop_insn_state(p);
/* Use indirect addressing to fetch the specified component. */
- brw_MOV(p, dst,
- retype(brw_vec1_indirect(addr.subnr, offset), src.type));
+ if (type_sz(src.type) > 4 &&
+ (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+ /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
+ *
+ * "When source or destination datatype is 64b or operation is
+ * integer DWord multiply, indirect addressing must not be
+ * used."
+ *
+ * To work around both of this issue, we do two integer MOVs
+ * insead of one 64-bit MOV. Because no double value should ever
+ * cross a register boundary, it's safe to use the immediate
+ * offset in the indirect here to handle adding 4 bytes to the
+ * offset and avoid the extra ADD to the register file.
+ */
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+ retype(brw_vec1_indirect(addr.subnr, offset),
+ BRW_REGISTER_TYPE_D));
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+ retype(brw_vec1_indirect(addr.subnr, offset + 4),
+ BRW_REGISTER_TYPE_D));
+ } else {
+ brw_MOV(p, dst,
+ retype(brw_vec1_indirect(addr.subnr, offset), src.type));
+ }
} else {
/* In SIMD4x2 mode the index can be either zero or one, replicate it
* to all bits of a flag register,