diff options
author | Rhys Perry <[email protected]> | 2020-02-21 12:23:28 +0000 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-03-05 20:18:34 +0000 |
commit | b088a4b113f530ef8c1ad07df33b8fca8586c5d1 (patch) | |
tree | d0cced2c8861f6e78cf590ade6fcaa302e20afcf /src | |
parent | c6e0c062daa55269661b190deaec40e9749198bc (diff) |
aco: only reserve sgprs for vcc if it's used
pipeline-db (Vega):
Totals:
SGPRS: 5186302 -> 5075616 (-2.13 %)
VGPRS: 3704580 -> 3704580 (0.00 %)
Spilled SGPRs: 144859 -> 144859 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Scratch size: 4124 -> 4124 (0.00 %) dwords per thread
Code Size: 247315944 -> 247315944 (0.00 %) bytes
LDS: 1311 -> 1311 (0.00 %) blocks
Max Waves: 674560 -> 674562 (0.00 %)
Totals from affected shaders:
SGPRS: 536992 -> 426306 (-20.61 %)
VGPRS: 356404 -> 356404 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 8498748 -> 8498748 (0.00 %) bytes
LDS: 8 -> 8 (0.00 %) blocks
Max Waves: 113832 -> 113834 (0.00 %)
There are some small code size changes in a few RotTR shaders and a small
increase in max_waves in two Detroit: Become Human shaders.
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3906>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3906>
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/compiler/aco_instruction_selection_setup.cpp | 3 | ||||
-rw-r--r-- | src/amd/compiler/aco_live_var_analysis.cpp | 16 | ||||
-rw-r--r-- | src/amd/compiler/aco_validate.cpp | 4 |
3 files changed, 16 insertions, 7 deletions
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 12c2910a8a0..b3c7ff059ad 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -1007,9 +1007,6 @@ setup_isel_context(Program* program, program->sgpr_limit = 104; } - /* TODO: we don't have to allocate VCC if we don't need it */ - program->needs_vcc = true; - calc_min_waves(program); program->vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves); program->sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves); diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 1f02e67bc1a..eb965e4e05c 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -87,6 +87,8 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, if (!definition.isTemp()) { continue; } + if ((definition.isFixed() || definition.hasHint()) && definition.physReg() == vcc) + program->needs_vcc = true; const Temp temp = definition.getTemp(); size_t n = 0; @@ -120,9 +122,10 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, for (unsigned i = 0; i < insn->operands.size(); ++i) { Operand& operand = insn->operands[i]; - if (!operand.isTemp()) { + if (!operand.isTemp()) continue; - } + if (operand.isFixed() && operand.physReg() == vcc) + program->needs_vcc = true; const Temp temp = operand.getTemp(); const bool inserted = temp.is_linear() ? live_sgprs.insert(temp).second @@ -161,6 +164,8 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, assert(is_phi(insn)); assert(insn->definitions.size() == 1 && insn->definitions[0].isTemp()); Definition& definition = insn->definitions[0]; + if ((definition.isFixed() || definition.hasHint()) && definition.physReg() == vcc) + program->needs_vcc = true; const Temp temp = definition.getTemp(); size_t n = 0; @@ -205,9 +210,10 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, : block->linear_preds; for (unsigned i = 0; i < preds.size(); ++i) { Operand &operand = insn->operands[i]; - if (!operand.isTemp()) { + if (!operand.isTemp()) continue; - } + if (operand.isFixed() && operand.physReg() == vcc) + program->needs_vcc = true; /* check if we changed an already processed block */ const bool inserted = live_temps[preds[i]].insert(operand.getTemp()).second; if (inserted) { @@ -364,6 +370,8 @@ live live_var_analysis(Program* program, std::vector<uint16_t> phi_sgpr_ops(program->blocks.size()); RegisterDemand new_demand; + program->needs_vcc = false; + /* this implementation assumes that the block idx corresponds to the block's position in program->blocks vector */ for (Block& block : program->blocks) worklist.insert(block.index); diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index a479083a54c..0e9b6c20ab0 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -390,6 +390,8 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio if ((op.getTemp().type() == RegType::vgpr && op.physReg() + op.size() > 256 + program->config->num_vgprs) || (op.getTemp().type() == RegType::sgpr && op.physReg() + op.size() > program->config->num_sgprs && op.physReg() < program->sgpr_limit)) err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i); + if (op.physReg() == vcc && !program->needs_vcc) + err |= ra_fail(output, loc, Location(), "Operand %d fixed to vcc but needs_vcc=false", i); if (!assignments[op.tempId()].firstloc.block) assignments[op.tempId()].firstloc = loc; if (!assignments[op.tempId()].defloc.block) @@ -407,6 +409,8 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio if ((def.getTemp().type() == RegType::vgpr && def.physReg() + def.size() > 256 + program->config->num_vgprs) || (def.getTemp().type() == RegType::sgpr && def.physReg() + def.size() > program->config->num_sgprs && def.physReg() < program->sgpr_limit)) err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i); + if (def.physReg() == vcc && !program->needs_vcc) + err |= ra_fail(output, loc, Location(), "Definition %d fixed to vcc but needs_vcc=false", i); if (!assignments[def.tempId()].firstloc.block) assignments[def.tempId()].firstloc = loc; assignments[def.tempId()].defloc = loc; |