aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-11-15 21:05:08 -0600
committerJason Ekstrand <[email protected]>2019-01-29 18:43:55 +0000
commiteab1c55590b15260d6e1ceb65f96661a5e42ad00 (patch)
tree8aece8de5d533d70ad8cd95248e99457c4c70643
parentcca199fd85b6181902fa878cf3e2f4dacf01f1c6 (diff)
intel/fs: Support SENDS in SHADER_OPCODE_SEND
Reviewed-by: Iago Toral Quiroga <[email protected]>
-rw-r--r--src/intel/compiler/brw_fs.cpp50
-rw-r--r--src/intel/compiler/brw_fs.h1
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp23
3 files changed, 66 insertions, 8 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 9ecabd58763..d82572a5b70 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6759,12 +6759,62 @@ fs_visitor::optimize()
OPT(lower_simd_width);
}
+ OPT(fixup_sends_duplicate_payload);
+
lower_uniform_pull_constant_loads();
validate();
}
/**
+ * From the Skylake PRM Vol. 2a docs for sends:
+ *
+ * "It is required that the second block of GRFs does not overlap with the
+ * first block."
+ *
+ * There are plenty of cases where we may accidentally violate this due to
+ * having, for instance, both sources be the constant 0. This little pass
+ * just adds a new vgrf for the second payload and copies it over.
+ */
+bool
+fs_visitor::fixup_sends_duplicate_payload()
+{
+ bool progress = false;
+
+ foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
+ if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
+ regions_overlap(inst->src[2], inst->mlen * REG_SIZE,
+ inst->src[3], inst->ex_mlen * REG_SIZE)) {
+ fs_reg tmp = fs_reg(VGRF, alloc.allocate(inst->ex_mlen),
+ BRW_REGISTER_TYPE_UD);
+ /* Sadly, we've lost all notion of channels and bit sizes at this
+ * point. Just WE_all it.
+ */
+ const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0);
+ fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD);
+ fs_reg copy_dst = tmp;
+ for (unsigned i = 0; i < inst->ex_mlen; i += 2) {
+ if (inst->ex_mlen == i + 1) {
+ /* Only one register left; do SIMD8 */
+ ibld.group(8, 0).MOV(copy_dst, copy_src);
+ } else {
+ ibld.MOV(copy_dst, copy_src);
+ }
+ copy_src = offset(copy_src, ibld, 1);
+ copy_dst = offset(copy_dst, ibld, 1);
+ }
+ inst->src[3] = tmp;
+ progress = true;
+ }
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
+/**
* Three source instruction must have a GRF/MRF destination register.
* ARF NULL is not allowed. Fix that up by allocating a temporary GRF.
*/
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 6467b4c6d95..5361b768003 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -103,6 +103,7 @@ public:
void setup_vs_payload();
void setup_gs_payload();
void setup_cs_payload();
+ bool fixup_sends_duplicate_payload();
void fixup_3src_null_dest();
void assign_curb_setup();
void calculate_urb_setup();
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 544d19826f4..e3b68fa3165 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -258,11 +258,6 @@ fs_generator::generate_send(fs_inst *inst,
struct brw_reg payload,
struct brw_reg payload2)
{
- /* SENDS not yet supported */
- assert(ex_desc.file == BRW_IMMEDIATE_VALUE && ex_desc.d == 0);
- assert(payload2.file == BRW_ARCHITECTURE_REGISTER_FILE &&
- payload2.nr == BRW_ARF_NULL);
-
const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
dst.nr == BRW_ARF_NULL;
const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE;
@@ -270,11 +265,23 @@ fs_generator::generate_send(fs_inst *inst,
uint32_t desc_imm = inst->desc |
brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
- brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+ uint32_t ex_desc_imm = brw_message_ex_desc(devinfo, inst->ex_mlen);
+
+ if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) {
+ /* If we have any sort of extended descriptor, then we need SENDS. This
+ * also covers the dual-payload case because ex_mlen goes in ex_desc.
+ */
+ brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
+ desc, desc_imm, ex_desc, ex_desc_imm);
+ if (inst->check_tdr)
+ brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC);
+ } else {
+ brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+ if (inst->check_tdr)
+ brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
+ }
brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
- if (inst->check_tdr)
- brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
}
void