summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/intel/compiler')
-rw-r--r--src/intel/compiler/brw_fs.cpp50
-rw-r--r--src/intel/compiler/brw_fs_builder.h15
2 files changed, 47 insertions, 18 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index d82572a5b70..8dd3b94fbd5 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5016,28 +5016,42 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
* Gen11+ the header has been removed so we can only use predication.
*/
const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
- const unsigned sz = header_sz + addr_sz + src_sz;
-
- /* Allocate space for the payload. */
- fs_reg *const components = new fs_reg[sz];
- const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
- unsigned n = 0;
const bool has_side_effects = inst->has_side_effects();
fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
fs_reg(brw_imm_d(0xffff));
- /* Construct the payload. */
- if (header_sz)
- components[n++] = emit_surface_header(bld, sample_mask);
+ fs_reg payload, payload2;
+ unsigned mlen, ex_mlen = 0;
+ if (devinfo->gen >= 9) {
+ /* We have split sends on gen9 and above */
+ assert(header_sz == 0);
+ payload = bld.move_to_vgrf(addr, addr_sz);
+ payload2 = bld.move_to_vgrf(src, src_sz);
+ mlen = addr_sz * (inst->exec_size / 8);
+ ex_mlen = src_sz * (inst->exec_size / 8);
+ } else {
+ /* Allocate space for the payload. */
+ const unsigned sz = header_sz + addr_sz + src_sz;
+ payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+ fs_reg *const components = new fs_reg[sz];
+ unsigned n = 0;
+
+ /* Construct the payload. */
+ if (header_sz)
+ components[n++] = emit_surface_header(bld, sample_mask);
+
+ for (unsigned i = 0; i < addr_sz; i++)
+ components[n++] = offset(addr, bld, i);
- for (unsigned i = 0; i < addr_sz; i++)
- components[n++] = offset(addr, bld, i);
+ for (unsigned i = 0; i < src_sz; i++)
+ components[n++] = offset(src, bld, i);
- for (unsigned i = 0; i < src_sz; i++)
- components[n++] = offset(src, bld, i);
+ bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+ mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
- bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+ delete[] components;
+ }
/* Predicate the instruction on the sample mask if no header is
* provided.
@@ -5162,7 +5176,8 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
/* Update the original instruction. */
inst->opcode = SHADER_OPCODE_SEND;
- inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
+ inst->mlen = mlen;
+ inst->ex_mlen = ex_mlen;
inst->header_size = header_sz;
inst->send_has_side_effects = has_side_effects;
inst->send_is_volatile = !has_side_effects;
@@ -5183,10 +5198,9 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
/* Finally, the payload */
inst->src[2] = payload;
+ inst->src[3] = payload2;
- inst->resize_sources(3);
-
- delete[] components;
+ inst->resize_sources(4);
}
static void
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index c50af4c1f55..3fc6e5f2c25 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -426,6 +426,21 @@ namespace brw {
return src_reg(component(dst, 0));
}
+ src_reg
+ move_to_vgrf(const src_reg &src, unsigned num_components) const
+ {
+ src_reg *const src_comps = new src_reg[num_components];
+ for (unsigned i = 0; i < num_components; i++)
+ src_comps[i] = offset(src, dispatch_width(), i);
+
+ const dst_reg dst = vgrf(src.type, num_components);
+ LOAD_PAYLOAD(dst, src_comps, num_components, 0);
+
+ delete[] src_comps;
+
+ return src_reg(dst);
+ }
+
void
emit_scan(enum opcode opcode, const dst_reg &tmp,
unsigned cluster_size, brw_conditional_mod mod) const