aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2020-07-10 16:31:31 +0100
committerMarge Bot <[email protected]>2020-07-10 22:36:14 +0000
commit19ca34ed270e20ed9ba02e3ea780f057eca2bae1 (patch)
tree8056956fc17762ef7de4e43bd126384c2d438b3d /src/amd
parent9a089baff1af757b1c0f033f4bb16cb2c8864271 (diff)
aco: don't create phis with undef operands in the boolean phi pass
We can create better merge code is we pass on undef. fossil-db (Navi): Totals from 1208 (0.89% of 135946) affected shaders: SGPRs: 66864 -> 66200 (-0.99%); split: -1.04%, +0.05% SpillSGPRs: 1179 -> 1156 (-1.95%) CodeSize: 6516672 -> 6469564 (-0.72%); split: -0.76%, +0.04% Instrs: 1232680 -> 1220859 (-0.96%); split: -0.97%, +0.01% Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3388>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/compiler/aco_lower_phis.cpp42
1 files changed, 28 insertions, 14 deletions
diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp
index ad0c620a69f..923ec8bdf72 100644
--- a/src/amd/compiler/aco_lower_phis.cpp
+++ b/src/amd/compiler/aco_lower_phis.cpp
@@ -44,7 +44,8 @@ struct ssa_state {
unsigned phi_block_idx;
unsigned loop_nest_depth;
std::map<unsigned, unsigned> writes;
- std::vector<unsigned> latest;
+ std::vector<Operand> latest;
+ std::vector<bool> visited;
};
Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state, bool before_write)
@@ -53,39 +54,50 @@ Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state, bool bef
auto it = state->writes.find(block_idx);
if (it != state->writes.end())
return Operand(Temp(it->second, program->lane_mask));
- if (state->latest[block_idx])
- return Operand(Temp(state->latest[block_idx], program->lane_mask));
+ if (state->visited[block_idx])
+ return state->latest[block_idx];
}
+ state->visited[block_idx] = true;
+
Block& block = program->blocks[block_idx];
size_t pred = block.linear_preds.size();
if (pred == 0 || block.loop_nest_depth < state->loop_nest_depth) {
return Operand(program->lane_mask);
} else if (block.loop_nest_depth > state->loop_nest_depth) {
Operand op = get_ssa(program, block_idx - 1, state, false);
- assert(!state->latest[block_idx]);
- state->latest[block_idx] = op.tempId();
+ state->latest[block_idx] = op;
return op;
} else if (pred == 1 || block.kind & block_kind_loop_exit) {
Operand op = get_ssa(program, block.linear_preds[0], state, false);
- assert(!state->latest[block_idx]);
- state->latest[block_idx] = op.tempId();
+ state->latest[block_idx] = op;
return op;
} else if (block.kind & block_kind_loop_header &&
!(program->blocks[state->phi_block_idx].kind & block_kind_loop_exit)) {
return Operand(program->lane_mask);
} else {
- unsigned res = program->allocateId();
- assert(!state->latest[block_idx]);
- state->latest[block_idx] = res;
+ Temp res = Temp(program->allocateId(), program->lane_mask);
+ state->latest[block_idx] = Operand(res);
+
+ Operand ops[pred];
+ for (unsigned i = 0; i < pred; i++)
+ ops[i] = get_ssa(program, block.linear_preds[i], state, false);
+
+ bool all_undef = true;
+ for (unsigned i = 0; i < pred; i++)
+ all_undef = all_undef && ops[i].isUndefined();
+ if (all_undef) {
+ state->latest[block_idx] = ops[0];
+ return ops[0];
+ }
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, pred, 1)};
for (unsigned i = 0; i < pred; i++)
- phi->operands[i] = get_ssa(program, block.linear_preds[i], state, false);
- phi->definitions[0] = Definition(Temp{res, program->lane_mask});
+ phi->operands[i] = ops[i];
+ phi->definitions[0] = Definition(res);
block.instructions.emplace(block.instructions.begin(), std::move(phi));
- return Operand(Temp(res, program->lane_mask));
+ return Operand(res);
}
}
@@ -169,6 +181,7 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
}
state->latest.resize(program->blocks.size());
+ state->visited.resize(program->blocks.size());
uint64_t undef_operands = 0;
for (unsigned i = 0; i < phi->operands.size(); i++)
@@ -187,7 +200,8 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
state->writes.clear();
state->needs_init = false;
}
- std::fill(state->latest.begin(), state->latest.end(), 0);
+ std::fill(state->latest.begin(), state->latest.end(), Operand(program->lane_mask));
+ std::fill(state->visited.begin(), state->visited.end(), false);
for (unsigned i = 0; i < phi->operands.size(); i++) {
if (phi->operands[i].isUndefined())