diff options
author | Jason Ekstrand <[email protected]> | 2018-11-15 21:05:08 -0600 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2019-01-29 18:43:55 +0000 |
commit | eab1c55590b15260d6e1ceb65f96661a5e42ad00 (patch) | |
tree | 8aece8de5d533d70ad8cd95248e99457c4c70643 | |
parent | cca199fd85b6181902fa878cf3e2f4dacf01f1c6 (diff) |
intel/fs: Support SENDS in SHADER_OPCODE_SEND
Reviewed-by: Iago Toral Quiroga <[email protected]>
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 50 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 1 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 23 |
3 files changed, 66 insertions, 8 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 9ecabd58763..d82572a5b70 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6759,12 +6759,62 @@ fs_visitor::optimize() OPT(lower_simd_width); } + OPT(fixup_sends_duplicate_payload); + lower_uniform_pull_constant_loads(); validate(); } /** + * From the Skylake PRM Vol. 2a docs for sends: + * + * "It is required that the second block of GRFs does not overlap with the + * first block." + * + * There are plenty of cases where we may accidentally violate this due to + * having, for instance, both sources be the constant 0. This little pass + * just adds a new vgrf for the second payload and copies it over. + */ +bool +fs_visitor::fixup_sends_duplicate_payload() +{ + bool progress = false; + + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { + if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 && + regions_overlap(inst->src[2], inst->mlen * REG_SIZE, + inst->src[3], inst->ex_mlen * REG_SIZE)) { + fs_reg tmp = fs_reg(VGRF, alloc.allocate(inst->ex_mlen), + BRW_REGISTER_TYPE_UD); + /* Sadly, we've lost all notion of channels and bit sizes at this + * point. Just WE_all it. + */ + const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0); + fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD); + fs_reg copy_dst = tmp; + for (unsigned i = 0; i < inst->ex_mlen; i += 2) { + if (inst->ex_mlen == i + 1) { + /* Only one register left; do SIMD8 */ + ibld.group(8, 0).MOV(copy_dst, copy_src); + } else { + ibld.MOV(copy_dst, copy_src); + } + copy_src = offset(copy_src, ibld, 1); + copy_dst = offset(copy_dst, ibld, 1); + } + inst->src[3] = tmp; + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + +/** * Three source instruction must have a GRF/MRF destination register. * ARF NULL is not allowed. Fix that up by allocating a temporary GRF. */ diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 6467b4c6d95..5361b768003 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -103,6 +103,7 @@ public: void setup_vs_payload(); void setup_gs_payload(); void setup_cs_payload(); + bool fixup_sends_duplicate_payload(); void fixup_3src_null_dest(); void assign_curb_setup(); void calculate_urb_setup(); diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 544d19826f4..e3b68fa3165 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -258,11 +258,6 @@ fs_generator::generate_send(fs_inst *inst, struct brw_reg payload, struct brw_reg payload2) { - /* SENDS not yet supported */ - assert(ex_desc.file == BRW_IMMEDIATE_VALUE && ex_desc.d == 0); - assert(payload2.file == BRW_ARCHITECTURE_REGISTER_FILE && - payload2.nr == BRW_ARF_NULL); - const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE && dst.nr == BRW_ARF_NULL; const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE; @@ -270,11 +265,23 @@ fs_generator::generate_send(fs_inst *inst, uint32_t desc_imm = inst->desc | brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size); - brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm); + uint32_t ex_desc_imm = brw_message_ex_desc(devinfo, inst->ex_mlen); + + if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) { + /* If we have any sort of extended descriptor, then we need SENDS. This + * also covers the dual-payload case because ex_mlen goes in ex_desc. + */ + brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2, + desc, desc_imm, ex_desc, ex_desc_imm); + if (inst->check_tdr) + brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC); + } else { + brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm); + if (inst->check_tdr) + brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC); + } brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot); - if (inst->check_tdr) - brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC); } void |