diff options
author | Jason Ekstrand <[email protected]> | 2017-10-17 14:45:12 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-11-07 10:37:52 -0800 |
commit | 6041a31e77680597614776e59edb12709ec2e019 (patch) | |
tree | 25bbe71e47fb2939934d71bcac1d5e2ee66e3a49 | |
parent | 10e4feed39120072f38274b95e884422f72f360f (diff) |
intel/eu: Fix broadcast instruction for 64-bit values on little-core
We're not using broadcast for any 32-bit types right now since we mostly
use it for emit_uniformize on 32-bit buffer indices. However, SPIR-V
subgroups are going to need it for 64-bit so let's make it work.
Reviewed-by: Iago Toral Quiroga <[email protected]>
-rw-r--r-- | src/intel/compiler/brw_eu_emit.c | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index a18cfa4239f..fae74cf80ab 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3430,8 +3430,30 @@ brw_broadcast(struct brw_codegen *p, brw_pop_insn_state(p); /* Use indirect addressing to fetch the specified component. */ - brw_MOV(p, dst, - retype(brw_vec1_indirect(addr.subnr, offset), src.type)); + if (type_sz(src.type) > 4 && + (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { + /* From the Cherryview PRM Vol 7. "Register Region Restrictions": + * + * "When source or destination datatype is 64b or operation is + * integer DWord multiply, indirect addressing must not be + * used." + * + * To work around both of this issue, we do two integer MOVs + * insead of one 64-bit MOV. Because no double value should ever + * cross a register boundary, it's safe to use the immediate + * offset in the indirect here to handle adding 4 bytes to the + * offset and avoid the extra ADD to the register file. + */ + brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), + retype(brw_vec1_indirect(addr.subnr, offset), + BRW_REGISTER_TYPE_D)); + brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), + retype(brw_vec1_indirect(addr.subnr, offset + 4), + BRW_REGISTER_TYPE_D)); + } else { + brw_MOV(p, dst, + retype(brw_vec1_indirect(addr.subnr, offset), src.type)); + } } else { /* In SIMD4x2 mode the index can be either zero or one, replicate it * to all bits of a flag register, |