diff options
author | Daniel Schürmann <[email protected]> | 2020-06-05 21:21:02 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-06-09 21:25:38 +0000 |
commit | 2a51840c52d404ddbde53f987b02df3832f45a42 (patch) | |
tree | 003282802f61276f6ddddc2517cf8721239a0ef2 | |
parent | 1d6f667193d3b29d27d6721f694af290510a4e60 (diff) |
aco: skip partial copies on first iteration when lowering to hw
Helps some Detroit : Become Human shaders.
Totals from affected shaders: (VEGA)
Code Size: 47693912 -> 47670212 (-0.05 %) bytes
Instructions: 9183788 -> 9177863 (-0.06 %)
Copies: 910052 -> 904127 (-0.65 %)
Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5226>
-rw-r--r-- | src/amd/compiler/aco_lower_to_hw_instr.cpp | 42 |
1 files changed, 15 insertions, 27 deletions
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 323dfdadf98..199c6c39368 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1260,27 +1260,19 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* /* first, handle paths in the location transfer graph */ bool preserve_scc = pi->tmp_in_scc && !writes_scc; + bool skip_partial_copies = true; it = copy_map.begin(); - while (it != copy_map.end()) { - - /* try to coalesce 32-bit sgpr copies to 64-bit copies */ - if (it->second.is_used == 0 && - it->second.def.getTemp().type() == RegType::sgpr && it->second.bytes == 4 && - !it->second.op.isConstant() && it->first % 2 == it->second.op.physReg() % 2) { - - PhysReg other_def_reg = PhysReg{it->first % 2 ? it->first - 1 : it->first + 1}; - PhysReg other_op_reg = PhysReg{it->first % 2 ? it->second.op.physReg() - 1 : it->second.op.physReg() + 1}; - std::map<PhysReg, copy_operation>::iterator other = copy_map.find(other_def_reg); - - if (other != copy_map.end() && !other->second.is_used && other->second.bytes == 4 && - other->second.op.physReg() == other_op_reg && !other->second.op.isConstant()) { - std::map<PhysReg, copy_operation>::iterator to_erase = it->first % 2 ? it : other; - it = it->first % 2 ? other : it; - copy_map.erase(to_erase); - it->second.bytes = 8; - } + while (true) { + if (copy_map.empty()) { + ctx->program->statistics[statistic_copies] += ctx->instructions.size() - num_instructions_before; + return; + } + if (it == copy_map.end()) { + if (!skip_partial_copies) + break; + skip_partial_copies = false; + it = copy_map.begin(); } - // TODO: try to coalesce subdword copies /* on GFX6/7, we need some small workarounds as there is no * SDWA instruction to do partial register writes */ @@ -1328,8 +1320,9 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* /* find portions where the target reg is not used as operand for any other copy */ if (it->second.is_used) { - if (it->second.op.isConstant()) { - /* we have to skip constants until is_used=0 */ + if (it->second.op.isConstant() || skip_partial_copies) { + /* we have to skip constants until is_used=0. + * we also skip partial copies at the beginning to help coalescing */ ++it; continue; } @@ -1367,7 +1360,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* } bool did_copy = do_copy(ctx, bld, it->second, &preserve_scc, pi->scratch_sgpr); - + skip_partial_copies = did_copy; std::pair<PhysReg, copy_operation> copy = *it; if (it->second.is_used == 0) { @@ -1415,11 +1408,6 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* } } - if (copy_map.empty()) { - ctx->program->statistics[statistic_copies] += ctx->instructions.size() - num_instructions_before; - return; - } - /* all target regs are needed as operand somewhere which means, all entries are part of a cycle */ unsigned largest = 0; for (const std::pair<const PhysReg, copy_operation>& op : copy_map) |