diff options
author | Matt Turner <[email protected]> | 2014-04-12 17:40:18 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2014-04-15 09:25:11 -0700 |
commit | f34f39330bb41fb0a86930908de10353193a841d (patch) | |
tree | 60124c3f1f1955c558b7cfcdc394d072c1ca960b /src | |
parent | 596737ee91cc199a8edff5dc440736471e28f297 (diff) |
i965/fs: Reimplement dead_code_elimination().
total instructions in shared programs: 1653399 -> 1651790 (-0.10%)
instructions in affected programs: 92157 -> 90548 (-1.75%)
GAINED: 2
LOST: 2
Also significantly reduces the number of optimization loop iterations:
total loop iterations in shared programs: 39724 -> 31651 (-20.32%)
loop iterations in affected programs: 21617 -> 13544 (-37.35%)
Including some great pathological cases, like 29 -> 3 in Strike Suit
Zero and 24 -> 3 in Dota2.
Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 57 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 115 |
3 files changed, 117 insertions, 56 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 8205fe9baa1..836c62b7e75 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -58,6 +58,7 @@ i965_FILES = \ brw_fs_channel_expressions.cpp \ brw_fs_copy_propagation.cpp \ brw_fs_cse.cpp \ + brw_fs_dead_code_eliminate.cpp \ brw_fs_fp.cpp \ brw_fs_generator.cpp \ brw_fs_live_variables.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 85a5463e020..c723bf0ead4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2085,61 +2085,6 @@ fs_visitor::opt_algebraic() return progress; } -/** - * Removes any instructions writing a VGRF where that VGRF is not used by any - * later instruction. - */ -bool -fs_visitor::dead_code_eliminate() -{ - bool progress = false; - int pc = 0; - - calculate_live_intervals(); - - foreach_list_safe(node, &this->instructions) { - fs_inst *inst = (fs_inst *)node; - - if (inst->dst.file == GRF && !inst->has_side_effects()) { - bool dead = true; - - for (int i = 0; i < inst->regs_written; i++) { - int var = live_intervals->var_from_vgrf[inst->dst.reg]; - assert(live_intervals->end[var + inst->dst.reg_offset + i] >= pc); - if (live_intervals->end[var + inst->dst.reg_offset + i] != pc) { - dead = false; - break; - } - } - - if (dead) { - /* Don't dead code eliminate instructions that write to the - * accumulator as a side-effect. Instead just set the destination - * to the null register to free it. - */ - switch (inst->opcode) { - case BRW_OPCODE_ADDC: - case BRW_OPCODE_SUBB: - case BRW_OPCODE_MACH: - inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); - break; - default: - inst->remove(); - progress = true; - break; - } - } - } - - pc++; - } - - if (progress) - invalidate_live_intervals(); - - return progress; -} - struct dead_code_hash_key { int vgrf; @@ -3249,8 +3194,8 @@ fs_visitor::run() progress = opt_cse() || progress; progress = opt_copy_propagate() || progress; progress = opt_peephole_predicated_break() || progress; - progress = dead_code_eliminate() || progress; progress = dead_code_eliminate_local() || progress; + progress = dead_code_eliminate() || progress; progress = opt_peephole_sel() || progress; progress = dead_control_flow_eliminate(this) || progress; progress = opt_saturate_propagation() || progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp new file mode 100644 index 00000000000..390ac9ada5b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -0,0 +1,115 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_fs.h" +#include "brw_fs_live_variables.h" +#include "brw_cfg.h" + +/** @file brw_fs_dead_code_eliminate.cpp + * + * Dataflow-aware dead code elimination. + * + * Walks the instruction list from the bottom, removing instructions that + * have results that both aren't used in later blocks and haven't been read + * yet in the tail end of this block. + */ + +bool +fs_visitor::dead_code_eliminate() +{ + bool progress = false; + + cfg_t cfg(&instructions); + + calculate_live_intervals(); + + int num_vars = live_intervals->num_vars; + BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars)); + + for (int b = 0; b < cfg.num_blocks; b++) { + bblock_t *block = cfg.blocks[b]; + memcpy(live, live_intervals->bd[b].liveout, + sizeof(BITSET_WORD) * BITSET_WORDS(num_vars)); + + for (fs_inst *inst = (fs_inst *)block->end; + inst != block->start->prev; + inst = (fs_inst *)inst->prev) { + if (inst->dst.file == GRF && + !inst->has_side_effects() && + !inst->writes_flag()) { + bool result_live = false; + + if (inst->regs_written == 1) { + int var = live_intervals->var_from_reg(&inst->dst); + result_live = BITSET_TEST(live, var); + } else { + int var = live_intervals->var_from_vgrf[inst->dst.reg]; + for (int i = 0; i < inst->regs_written; i++) { + result_live = result_live || BITSET_TEST(live, var + i); + } + } + + if (!result_live) { + progress = true; + + switch (inst->opcode) { + case BRW_OPCODE_ADDC: + case BRW_OPCODE_SUBB: + case BRW_OPCODE_MACH: + inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); + break; + default: + inst->opcode = BRW_OPCODE_NOP; + continue; + } + } + } + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + int var = live_intervals->var_from_vgrf[inst->src[i].reg]; + + for (int j = 0; j < inst->regs_read(this, i); j++) { + BITSET_SET(live, var + inst->src[i].reg_offset + j); + } + } + } + } + } + + ralloc_free(live); + + if (progress) { + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + if (inst->opcode == BRW_OPCODE_NOP) { + inst->remove(); + } + } + + invalidate_live_intervals(); + } + + return progress; +} |