aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2020-02-27 19:56:22 +0000
committerMarge Bot <[email protected]>2020-03-05 19:37:24 +0000
commitc6e0c062daa55269661b190deaec40e9749198bc (patch)
tree73fcf1b9822a6113c16add3191c52d793ff012fa /src/amd
parent47b7f104a0aa3692e9fb202741406a0c6d9ac8ad (diff)
aco: improve control flow handling in GFX6-9 NOP pass
Fixes Detroit: Become Human hang. Also affects World of Warships. pipeline-db (Tahiti): Totals from affected shaders: SGPRS: 0 -> 0 (0.00 %) VGPRS: 0 -> 0 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 0 -> 0 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 0 -> 0 (0.00 %) pipeline-db (Polaris): Totals from affected shaders: SGPRS: 17168 -> 17168 (0.00 %) VGPRS: 11296 -> 11296 (0.00 %) Spilled SGPRs: 1870 -> 1870 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 1472628 -> 1473292 (0.05 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 628 -> 628 (0.00 %) pipeline-db (Vega): Totals from affected shaders: SGPRS: 17168 -> 17168 (0.00 %) VGPRS: 11296 -> 11296 (0.00 %) Spilled SGPRs: 1870 -> 1870 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 1409716 -> 1410380 (0.05 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 0 -> 0 (0.00 %) Max Waves is lower than it should be because of a null winsys bug. Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4004> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4004>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/compiler/aco_insert_NOPs.cpp21
1 files changed, 13 insertions, 8 deletions
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index 4302711ba81..75dbe852174 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -225,7 +225,16 @@ int handle_raw_hazard_internal(Program *program, Block *block,
return 0;
}
- return 0;
+ int res = 0;
+
+ /* Loops require branch instructions, which count towards the wait
+ * states. So even with loops this should finish unless nops_needed is some
+ * huge value. */
+ for (unsigned lin_pred : block->linear_preds) {
+ res = std::max(res, handle_raw_hazard_internal<Valu, Vintrp, Salu>(
+ program, &program->blocks[lin_pred], nops_needed, reg, mask));
+ }
+ return res;
}
template <bool Valu, bool Vintrp, bool Salu>
@@ -763,14 +772,10 @@ void mitigate_hazards(Program *program)
void insert_NOPs(Program* program)
{
- if (program->chip_class >= GFX10) {
+ if (program->chip_class >= GFX10)
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10>(program);
- } else {
- for (Block& block : program->blocks) {
- NOP_ctx_gfx6 ctx;
- handle_block<NOP_ctx_gfx6, handle_instruction_gfx6>(program, ctx, block);
- }
- }
+ else
+ mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6>(program);
}
}