summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2017-01-14 23:32:12 -0800
committerKenneth Graunke <[email protected]>2017-01-17 21:45:04 -0800
commit9919542f1cfff70524bc6117d19bf88e59159caa (patch)
tree9f1cca98e469b7afb4f2f311555da0461d450d6c
parent90bf39cd2b39874557a7c492d92b85945d45f3c6 (diff)
i965: Make DCE set null destinations on messages with side effects.
(Co-authored by Matt Turner.) Image atomics, for example, return a value - but the shader may not want to use it. We assigned a useless VGRF destination. This seemed harmless, but it can actually be quite harmful. The register allocator has to assign that VGRF to a real register. It may assign the same actual GRF to the destination of an instruction that follows soon after. This results in a write-after-write (WAW) dependency, and stall. A number of "Deus Ex: Mankind Divided" shaders use image atomics, but don't use the return value. Several of these were hitting WAW stalls for nearly 14,000 (poorly estimated) cycles a pop. Making dead code elimination null out the destination avoids this issue. This patch cuts one shader's estimated cycles by -98.39%! Removing the message response should also help with data cluster bandwidth. On Skylake: (instruction counts remain identical) total cycles in shared programs: 255413890 -> 248081010 (-2.87%) cycles in affected programs: 12019948 -> 4687068 (-61.01%) helped: 24 HURT: 10 v2: Make can_omit_write independent of can_eliminate (Curro). Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Francisco Jerez <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp54
1 files changed, 41 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 0dd609121ca..7adb4278919 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -34,6 +34,42 @@
* yet in the tail end of this block.
*/
+/**
+ * Is it safe to eliminate the instruction?
+ */
+static bool
+can_eliminate(const fs_inst *inst, BITSET_WORD *flag_live)
+{
+ return !inst->is_control_flow() &&
+ !inst->has_side_effects() &&
+ !(flag_live[0] & inst->flags_written()) &&
+ !inst->writes_accumulator;
+}
+
+/**
+ * Is it safe to omit the write, making the destination ARF null?
+ */
+static bool
+can_omit_write(const fs_inst *inst)
+{
+ switch (inst->opcode) {
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+ return true;
+ default:
+ /* We can eliminate the destination write for ordinary instructions,
+ * but not most SENDs.
+ */
+ if (inst->opcode < 128 && inst->mlen == 0)
+ return true;
+
+ /* It might not be safe for other virtual opcodes. */
+ return false;
+ }
+}
+
bool
fs_visitor::dead_code_eliminate()
{
@@ -52,29 +88,21 @@ fs_visitor::dead_code_eliminate()
sizeof(BITSET_WORD));
foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
- if (inst->dst.file == VGRF && !inst->has_side_effects()) {
+ if (inst->dst.file == VGRF) {
const unsigned var = live_intervals->var_from_reg(inst->dst);
bool result_live = false;
for (unsigned i = 0; i < regs_written(inst); i++)
result_live |= BITSET_TEST(live, var + i);
- if (!result_live) {
+ if (!result_live &&
+ (can_omit_write(inst) || can_eliminate(inst, flag_live))) {
+ inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
progress = true;
-
- if (inst->writes_accumulator || inst->flags_written()) {
- inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
- } else {
- inst->opcode = BRW_OPCODE_NOP;
- }
}
}
- if (inst->dst.is_null() &&
- !inst->is_control_flow() &&
- !inst->has_side_effects() &&
- !(flag_live[0] & inst->flags_written()) &&
- !inst->writes_accumulator) {
+ if (inst->dst.is_null() && can_eliminate(inst, flag_live)) {
inst->opcode = BRW_OPCODE_NOP;
progress = true;
}