diff options
-rw-r--r-- | src/compiler/nir/nir.h | 1 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 35 | ||||
-rw-r--r-- | src/intel/compiler/brw_nir.c | 7 |
3 files changed, 43 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 5d29b2f64cc..c44513e5ac6 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4255,6 +4255,7 @@ bool nir_opt_access(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader); bool nir_opt_algebraic_before_ffma(nir_shader *shader); bool nir_opt_algebraic_late(nir_shader *shader); +bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); /* Try to combine a and b into a. Return true if combination was possible, diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 472db765026..76ec1acc056 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1853,8 +1853,43 @@ for op in ['ffma']: (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)), ] +distribute_src_mods = [ + # Try to remove some spurious negations rather than pushing them down. + (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)), + (('ffma', ('fneg', a), ('fneg', b), c), ('ffma', a, b, c)), + (('fdot_replicated2', ('fneg', a), ('fneg', b)), ('fdot_replicated2', a, b)), + (('fdot_replicated3', ('fneg', a), ('fneg', b)), ('fdot_replicated3', a, b)), + (('fdot_replicated4', ('fneg', a), ('fneg', b)), ('fdot_replicated4', a, b)), + (('fneg', ('fneg', a)), a), + + (('fneg', ('ffma(is_used_once)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))), + (('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c)), + (('fneg', ('fadd(is_used_once)', a, b)), ('fadd', ('fneg', a), ('fneg', b))), + + # Note that fmin <-> fmax. I don't think there is a way to distribute + # fabs() into fmin or fmax. + (('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))), + (('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))), + + # fdph works mostly like fdot, but to get the correct result, the negation + # must be applied to the second source. + (('fneg', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', a, ('fneg', b))), + (('fabs', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', ('fabs', a), ('fabs', b))), + + (('fneg', ('fsign(is_used_once)', a)), ('fsign', ('fneg', a))), + (('fabs', ('fsign(is_used_once)', a)), ('fsign', ('fabs', a))), +] + +for op in ['fmul', 'fdot_replicated2', 'fdot_replicated3', 'fdot_replicated4']: + distribute_src_mods.extend([ + (('fneg', (op + '(is_used_once)', a, b)), (op, ('fneg', a), b)), + (('fabs', (op + '(is_used_once)', a, b)), (op, ('fabs', a), ('fabs', b))), + ]) + print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()) print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma", before_ffma_optimizations).render()) print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", late_optimizations).render()) +print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_distribute_src_mods", + distribute_src_mods).render()) diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 8c0be0d8a26..c7e9c0e1129 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -921,6 +921,13 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, if (is_scalar) OPT(nir_lower_alu_to_scalar, NULL, NULL); + + while (OPT(nir_opt_algebraic_distribute_src_mods)) { + OPT(nir_copy_prop); + OPT(nir_opt_dce); + OPT(nir_opt_cse); + } + OPT(nir_lower_to_source_mods, nir_lower_all_source_mods); OPT(nir_copy_prop); OPT(nir_opt_dce); |