summaryrefslogtreecommitdiffstats
path: root/src/amd/compiler/aco_spill.cpp
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-09-13 16:41:00 +0100
committerRhys Perry <[email protected]>2019-10-23 19:11:21 +0100
commit08d510010b7586387e363460b98e6a45bbe97164 (patch)
treeb505450ea74de3fce98048cabab34aa6c97af9cf /src/amd/compiler/aco_spill.cpp
parent7453c1adff9d8a9e09cd7585e05c4db1c70870be (diff)
aco: increase accuracy of SGPR limits
SGPRs are allocated in groups of 16 on GFX8/GFX9. GFX10 allocates a fixed number of SGPRs and has 106 addressable SGPRs. pipeline-db (Vega): SGPRS: 5912 -> 6232 (5.41 %) VGPRS: 1772 -> 1780 (0.45 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 88228 -> 87904 (-0.37 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 559 -> 571 (2.15 %) piepline-db (Navi): SGPRS: 341256 -> 363384 (6.48 %) VGPRS: 171536 -> 170960 (-0.34 %) Spilled SGPRs: 832 -> 581 (-30.17 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 14207332 -> 14190872 (-0.12 %) bytes LDS: 33 -> 33 (0.00 %) blocks Max Waves: 18072 -> 18251 (0.99 %) v2: unconditionally count vcc as an extra sgpr on GFX10+ v3: pass SGPRs rounded to 8 Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]>
Diffstat (limited to 'src/amd/compiler/aco_spill.cpp')
-rw-r--r--src/amd/compiler/aco_spill.cpp8
1 files changed, 3 insertions, 5 deletions
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index 92a23bb355c..56167e36d6d 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -1568,8 +1568,6 @@ void spill(Program* program, live& live_vars, const struct radv_nir_compiler_opt
return;
/* else, we check if we can improve things a bit */
- uint16_t total_sgpr_regs = options->chip_class >= GFX8 ? 800 : 512;
- uint16_t max_addressible_sgpr = program->sgpr_limit;
/* calculate target register demand */
RegisterDemand max_reg_demand;
@@ -1577,14 +1575,14 @@ void spill(Program* program, live& live_vars, const struct radv_nir_compiler_opt
max_reg_demand.update(block.register_demand);
}
- RegisterDemand target_pressure = {256, int16_t(max_addressible_sgpr)};
+ RegisterDemand target_pressure = {256, int16_t(program->sgpr_limit)};
unsigned num_waves = 1;
- int spills_to_vgpr = (max_reg_demand.sgpr - max_addressible_sgpr + 63) / 64;
+ int spills_to_vgpr = (max_reg_demand.sgpr - program->sgpr_limit + 63) / 64;
/* test if it possible to increase occupancy with little spilling */
for (unsigned num_waves_next = 2; num_waves_next <= 8; num_waves_next++) {
RegisterDemand target_pressure_next = {int16_t((256 / num_waves_next) & ~3),
- int16_t(std::min<uint16_t>(((total_sgpr_regs / num_waves_next) & ~7) - 2, max_addressible_sgpr))};
+ int16_t(get_addr_sgpr_from_waves(program, num_waves_next))};
/* Currently no vgpr spilling supported.
* Spill as many sgprs as necessary to not hinder occupancy */