diff options
author | Kenneth Graunke <[email protected]> | 2017-01-14 23:32:12 -0800 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2017-01-17 21:45:04 -0800 |
commit | 9919542f1cfff70524bc6117d19bf88e59159caa (patch) | |
tree | 9f1cca98e469b7afb4f2f311555da0461d450d6c /src/mesa/drivers | |
parent | 90bf39cd2b39874557a7c492d92b85945d45f3c6 (diff) |
i965: Make DCE set null destinations on messages with side effects.
(Co-authored by Matt Turner.)
Image atomics, for example, return a value - but the shader may not
want to use it. We assigned a useless VGRF destination. This seemed
harmless, but it can actually be quite harmful. The register allocator
has to assign that VGRF to a real register. It may assign the same
actual GRF to the destination of an instruction that follows soon after.
This results in a write-after-write (WAW) dependency, and stall.
A number of "Deus Ex: Mankind Divided" shaders use image atomics, but
don't use the return value. Several of these were hitting WAW stalls
for nearly 14,000 (poorly estimated) cycles a pop. Making dead code
elimination null out the destination avoids this issue.
This patch cuts one shader's estimated cycles by -98.39%! Removing the
message response should also help with data cluster bandwidth.
On Skylake:
(instruction counts remain identical)
total cycles in shared programs: 255413890 -> 248081010 (-2.87%)
cycles in affected programs: 12019948 -> 4687068 (-61.01%)
helped: 24
HURT: 10
v2: Make can_omit_write independent of can_eliminate (Curro).
Signed-off-by: Kenneth Graunke <[email protected]>
Reviewed-by: Francisco Jerez <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 54 |
1 files changed, 41 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 0dd609121ca..7adb4278919 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -34,6 +34,42 @@ * yet in the tail end of this block. */ +/** + * Is it safe to eliminate the instruction? + */ +static bool +can_eliminate(const fs_inst *inst, BITSET_WORD *flag_live) +{ + return !inst->is_control_flow() && + !inst->has_side_effects() && + !(flag_live[0] & inst->flags_written()) && + !inst->writes_accumulator; +} + +/** + * Is it safe to omit the write, making the destination ARF null? + */ +static bool +can_omit_write(const fs_inst *inst) +{ + switch (inst->opcode) { + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + return true; + default: + /* We can eliminate the destination write for ordinary instructions, + * but not most SENDs. + */ + if (inst->opcode < 128 && inst->mlen == 0) + return true; + + /* It might not be safe for other virtual opcodes. */ + return false; + } +} + bool fs_visitor::dead_code_eliminate() { @@ -52,29 +88,21 @@ fs_visitor::dead_code_eliminate() sizeof(BITSET_WORD)); foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { - if (inst->dst.file == VGRF && !inst->has_side_effects()) { + if (inst->dst.file == VGRF) { const unsigned var = live_intervals->var_from_reg(inst->dst); bool result_live = false; for (unsigned i = 0; i < regs_written(inst); i++) result_live |= BITSET_TEST(live, var + i); - if (!result_live) { + if (!result_live && + (can_omit_write(inst) || can_eliminate(inst, flag_live))) { + inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); progress = true; - - if (inst->writes_accumulator || inst->flags_written()) { - inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); - } else { - inst->opcode = BRW_OPCODE_NOP; - } } } - if (inst->dst.is_null() && - !inst->is_control_flow() && - !inst->has_side_effects() && - !(flag_live[0] & inst->flags_written()) && - !inst->writes_accumulator) { + if (inst->dst.is_null() && can_eliminate(inst, flag_live)) { inst->opcode = BRW_OPCODE_NOP; progress = true; } |