diff options
author | Rhys Perry <[email protected]> | 2020-07-10 16:31:31 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-07-10 22:36:14 +0000 |
commit | 19ca34ed270e20ed9ba02e3ea780f057eca2bae1 (patch) | |
tree | 8056956fc17762ef7de4e43bd126384c2d438b3d /src/amd | |
parent | 9a089baff1af757b1c0f033f4bb16cb2c8864271 (diff) |
aco: don't create phis with undef operands in the boolean phi pass
We can create better merge code is we pass on undef.
fossil-db (Navi):
Totals from 1208 (0.89% of 135946) affected shaders:
SGPRs: 66864 -> 66200 (-0.99%); split: -1.04%, +0.05%
SpillSGPRs: 1179 -> 1156 (-1.95%)
CodeSize: 6516672 -> 6469564 (-0.72%); split: -0.76%, +0.04%
Instrs: 1232680 -> 1220859 (-0.96%); split: -0.97%, +0.01%
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3388>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/compiler/aco_lower_phis.cpp | 42 |
1 files changed, 28 insertions, 14 deletions
diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp index ad0c620a69f..923ec8bdf72 100644 --- a/src/amd/compiler/aco_lower_phis.cpp +++ b/src/amd/compiler/aco_lower_phis.cpp @@ -44,7 +44,8 @@ struct ssa_state { unsigned phi_block_idx; unsigned loop_nest_depth; std::map<unsigned, unsigned> writes; - std::vector<unsigned> latest; + std::vector<Operand> latest; + std::vector<bool> visited; }; Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state, bool before_write) @@ -53,39 +54,50 @@ Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state, bool bef auto it = state->writes.find(block_idx); if (it != state->writes.end()) return Operand(Temp(it->second, program->lane_mask)); - if (state->latest[block_idx]) - return Operand(Temp(state->latest[block_idx], program->lane_mask)); + if (state->visited[block_idx]) + return state->latest[block_idx]; } + state->visited[block_idx] = true; + Block& block = program->blocks[block_idx]; size_t pred = block.linear_preds.size(); if (pred == 0 || block.loop_nest_depth < state->loop_nest_depth) { return Operand(program->lane_mask); } else if (block.loop_nest_depth > state->loop_nest_depth) { Operand op = get_ssa(program, block_idx - 1, state, false); - assert(!state->latest[block_idx]); - state->latest[block_idx] = op.tempId(); + state->latest[block_idx] = op; return op; } else if (pred == 1 || block.kind & block_kind_loop_exit) { Operand op = get_ssa(program, block.linear_preds[0], state, false); - assert(!state->latest[block_idx]); - state->latest[block_idx] = op.tempId(); + state->latest[block_idx] = op; return op; } else if (block.kind & block_kind_loop_header && !(program->blocks[state->phi_block_idx].kind & block_kind_loop_exit)) { return Operand(program->lane_mask); } else { - unsigned res = program->allocateId(); - assert(!state->latest[block_idx]); - state->latest[block_idx] = res; + Temp res = Temp(program->allocateId(), program->lane_mask); + state->latest[block_idx] = Operand(res); + + Operand ops[pred]; + for (unsigned i = 0; i < pred; i++) + ops[i] = get_ssa(program, block.linear_preds[i], state, false); + + bool all_undef = true; + for (unsigned i = 0; i < pred; i++) + all_undef = all_undef && ops[i].isUndefined(); + if (all_undef) { + state->latest[block_idx] = ops[0]; + return ops[0]; + } aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, pred, 1)}; for (unsigned i = 0; i < pred; i++) - phi->operands[i] = get_ssa(program, block.linear_preds[i], state, false); - phi->definitions[0] = Definition(Temp{res, program->lane_mask}); + phi->operands[i] = ops[i]; + phi->definitions[0] = Definition(res); block.instructions.emplace(block.instructions.begin(), std::move(phi)); - return Operand(Temp(res, program->lane_mask)); + return Operand(res); } } @@ -169,6 +181,7 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block, } state->latest.resize(program->blocks.size()); + state->visited.resize(program->blocks.size()); uint64_t undef_operands = 0; for (unsigned i = 0; i < phi->operands.size(); i++) @@ -187,7 +200,8 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block, state->writes.clear(); state->needs_init = false; } - std::fill(state->latest.begin(), state->latest.end(), 0); + std::fill(state->latest.begin(), state->latest.end(), Operand(program->lane_mask)); + std::fill(state->visited.begin(), state->visited.end(), false); for (unsigned i = 0; i < phi->operands.size(); i++) { if (phi->operands[i].isUndefined()) |