diff options
author | Rhys Perry <[email protected]> | 2020-05-12 15:08:05 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-06-10 15:05:11 +0000 |
commit | 62ea429a9935582636c87c38210408ccba0477da (patch) | |
tree | 203173aafeca0c6cd633b123b0b0f1738f30d8a7 /src | |
parent | 56345b8c610e06b2c6ccb0d0975e62f9a008e34e (diff) |
aco: prefer 4-byte aligned definitions
shader-db (Navi, fp16 enabled):
Totals from 42 (0.03% of 127638) affected shaders:
CodeSize: 811984 -> 806224 (-0.71%)
Instrs: 155733 -> 155939 (+0.13%); split: -0.04%, +0.18%
Cycles: 1982568 -> 1984400 (+0.09%); split: -0.06%, +0.15%
VMEM: 7187 -> 7121 (-0.92%); split: +0.86%, -1.78%
SMEM: 1770 -> 1769 (-0.06%)
VClause: 1475 -> 1476 (+0.07%)
Copies: 12406 -> 12606 (+1.61%); split: -0.46%, +2.07%
Branches: 5901 -> 5900 (-0.02%); split: -0.25%, +0.24%
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5040>
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/compiler/aco_register_allocation.cpp | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 6a1e2b78c56..a198c9312c4 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -567,38 +567,9 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx, uint32_t lb = info.lb; uint32_t ub = info.ub; uint32_t size = info.size; - uint32_t stride = info.stride; + uint32_t stride = info.rc.is_subdword() ? DIV_ROUND_UP(info.stride, 4) : info.stride; RegClass rc = info.rc; - if (rc.is_subdword()) { - for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) { - assert(reg_file[entry.first] == 0xF0000000); - if (lb > entry.first || entry.first >= ub) - continue; - - for (unsigned i = 0; i < 4; i+= stride) { - if (entry.second[i] != 0) - continue; - - bool reg_found = true; - for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++) - reg_found &= entry.second[i + j] == 0; - - /* check neighboring reg if needed */ - reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0); - if (reg_found) { - PhysReg res{entry.first}; - res.reg_b += i; - adjust_max_used_regs(ctx, rc, entry.first); - return {res, true}; - } - } - } - - stride = 1; /* stride in full registers */ - rc = info.rc = RegClass(RegType::vgpr, size); - } - if (stride == 1) { for (unsigned stride = 8; stride > 1; stride /= 2) { @@ -689,6 +660,35 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx, reg_lo += stride; } + /* do this late because using the upper bytes of a register can require + * larger instruction encodings or copies + * TODO: don't do this in situations where it doesn't benefit */ + if (rc.is_subdword()) { + for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) { + assert(reg_file[entry.first] == 0xF0000000); + if (lb > entry.first || entry.first >= ub) + continue; + + for (unsigned i = 0; i < 4; i+= info.stride) { + if (entry.second[i] != 0) + continue; + + bool reg_found = true; + for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++) + reg_found &= entry.second[i + j] == 0; + + /* check neighboring reg if needed */ + reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0); + if (reg_found) { + PhysReg res{entry.first}; + res.reg_b += i; + adjust_max_used_regs(ctx, rc, entry.first); + return {res, true}; + } + } + } + } + return {{}, false}; } |