aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/amd/compiler/aco_register_allocation.cpp60
1 files changed, 30 insertions, 30 deletions
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 6a1e2b78c56..a198c9312c4 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -567,38 +567,9 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
uint32_t lb = info.lb;
uint32_t ub = info.ub;
uint32_t size = info.size;
- uint32_t stride = info.stride;
+ uint32_t stride = info.rc.is_subdword() ? DIV_ROUND_UP(info.stride, 4) : info.stride;
RegClass rc = info.rc;
- if (rc.is_subdword()) {
- for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) {
- assert(reg_file[entry.first] == 0xF0000000);
- if (lb > entry.first || entry.first >= ub)
- continue;
-
- for (unsigned i = 0; i < 4; i+= stride) {
- if (entry.second[i] != 0)
- continue;
-
- bool reg_found = true;
- for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++)
- reg_found &= entry.second[i + j] == 0;
-
- /* check neighboring reg if needed */
- reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0);
- if (reg_found) {
- PhysReg res{entry.first};
- res.reg_b += i;
- adjust_max_used_regs(ctx, rc, entry.first);
- return {res, true};
- }
- }
- }
-
- stride = 1; /* stride in full registers */
- rc = info.rc = RegClass(RegType::vgpr, size);
- }
-
if (stride == 1) {
for (unsigned stride = 8; stride > 1; stride /= 2) {
@@ -689,6 +660,35 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
reg_lo += stride;
}
+ /* do this late because using the upper bytes of a register can require
+ * larger instruction encodings or copies
+ * TODO: don't do this in situations where it doesn't benefit */
+ if (rc.is_subdword()) {
+ for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) {
+ assert(reg_file[entry.first] == 0xF0000000);
+ if (lb > entry.first || entry.first >= ub)
+ continue;
+
+ for (unsigned i = 0; i < 4; i+= info.stride) {
+ if (entry.second[i] != 0)
+ continue;
+
+ bool reg_found = true;
+ for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++)
+ reg_found &= entry.second[i + j] == 0;
+
+ /* check neighboring reg if needed */
+ reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0);
+ if (reg_found) {
+ PhysReg res{entry.first};
+ res.reg_b += i;
+ adjust_max_used_regs(ctx, rc, entry.first);
+ return {res, true};
+ }
+ }
+ }
+ }
+
return {{}, false};
}