summaryrefslogtreecommitdiffstats
path: root/src/amd/compiler
diff options
context:
space:
mode:
authorDaniel Schürmann <[email protected]>2019-09-26 12:08:13 +0200
committerDaniel Schürmann <[email protected]>2019-09-30 09:44:10 +0000
commit1d29895e5b8f34fefc280964e65f883f7c491dfe (patch)
tree6f79dae54dd81f5ad1020a0b6b7484e03e2a1563 /src/amd/compiler
parent0fb27f1e5a1ea35e46352d7ea176d329ac90e3c2 (diff)
aco: call nir_opt_algebraic_late() exhaustively
57559 shaders in 28980 tests Totals: SGPRS: 2963407 -> 2959935 (-0.12 %) VGPRS: 2014812 -> 2016328 (0.08 %) Spilled SGPRs: 1077 -> 1077 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 10348 -> 10348 (0.00 %) dwords per thread Code Size: 114545436 -> 114498084 (-0.04 %) bytes LDS: 933 -> 933 (0.00 %) blocks Max Waves: 375997 -> 375866 (-0.03 %) Reviewed-by: Connor Abbott <[email protected]>
Diffstat (limited to 'src/amd/compiler')
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp19
1 files changed, 15 insertions, 4 deletions
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index a1b2e4a7e57..a32ada0613e 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -1324,13 +1324,24 @@ setup_isel_context(Program* program,
nir_copy_prop(nir);
nir_opt_constant_folding(nir);
nir_opt_algebraic(nir);
- nir_opt_algebraic_late(nir);
- nir_opt_constant_folding(nir);
+
+ /* Do late algebraic optimization to turn add(a, neg(b)) back into
+ * subs, then the mandatory cleanup after algebraic. Note that it may
+ * produce fnegs, and if so then we need to keep running to squash
+ * fneg(fneg(a)).
+ */
+ bool more_late_algebraic = true;
+ while (more_late_algebraic) {
+ more_late_algebraic = false;
+ NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late);
+ NIR_PASS_V(nir, nir_opt_constant_folding);
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_dce);
+ NIR_PASS_V(nir, nir_opt_cse);
+ }
/* cleanup passes */
nir_lower_load_const_to_scalar(nir);
- nir_opt_cse(nir);
- nir_opt_dce(nir);
nir_opt_shrink_load(nir);
nir_move_options move_opts = (nir_move_options)(
nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | nir_move_comparisons);