diff options
author | Ian Romanick <[email protected]> | 2018-08-22 19:15:15 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2019-05-06 22:52:28 -0700 |
commit | a83a6e96904a8941fce30b862b18a7c8b3a30dad (patch) | |
tree | 5d7890aadae3229af1d3125945be79953c397461 | |
parent | e00fa99b08b39b50fa8d4afe11652d9d0f395254 (diff) |
nir/algebraic: Pull common addition out of flrp arguments
v2: Augment the late optimization patterns with a couple pre-ffma pass
patterns.
All Gen7+ platforms had similar results. (Skylake shown)
total instructions in shared programs: 15342982 -> 15342485 (<.01%)
instructions in affected programs: 56304 -> 55807 (-0.88%)
helped: 235
HURT: 0
helped stats (abs) min: 1 max: 8 x̄: 2.11 x̃: 1
helped stats (rel) min: 0.11% max: 8.82% x̄: 1.27% x̃: 0.74%
95% mean confidence interval for instructions value: -2.31 -1.92
95% mean confidence interval for instructions %-change: -1.46% -1.09%
Instructions are helped.
total cycles in shared programs: 355734740 -> 355734320 (<.01%)
cycles in affected programs: 1028807 -> 1028387 (-0.04%)
helped: 134
HURT: 104
helped stats (abs) min: 1 max: 212 x̄: 25.69 x̃: 8
helped stats (rel) min: <.01% max: 9.36% x̄: 1.33% x̃: 0.61%
HURT stats (abs) min: 1 max: 203 x̄: 29.06 x̃: 8
HURT stats (rel) min: 0.02% max: 15.76% x̄: 1.76% x̃: 0.46%
95% mean confidence interval for cycles value: -8.51 4.98
95% mean confidence interval for cycles %-change: -0.35% 0.39%
Inconclusive result (value mean confidence interval includes 0).
Sandy Bridge
total instructions in shared programs: 10886815 -> 10886390 (<.01%)
instructions in affected programs: 36883 -> 36458 (-1.15%)
helped: 147
HURT: 0
helped stats (abs) min: 1 max: 7 x̄: 2.89 x̃: 3
helped stats (rel) min: 0.35% max: 8.00% x̄: 1.60% x̃: 1.23%
95% mean confidence interval for instructions value: -3.12 -2.67
95% mean confidence interval for instructions %-change: -1.83% -1.38%
Instructions are helped.
total cycles in shared programs: 154188360 -> 154186902 (<.01%)
cycles in affected programs: 388094 -> 386636 (-0.38%)
helped: 90
HURT: 58
helped stats (abs) min: 1 max: 243 x̄: 36.80 x̃: 15
helped stats (rel) min: 0.04% max: 9.23% x̄: 1.26% x̃: 0.83%
HURT stats (abs) min: 1 max: 684 x̄: 31.97 x̃: 10
HURT stats (rel) min: 0.03% max: 13.50% x̄: 1.15% x̃: 0.51%
95% mean confidence interval for cycles value: -22.62 2.92
95% mean confidence interval for cycles %-change: -0.68% 0.05%
Inconclusive result (value mean confidence interval includes 0).
Iron Lake and GM45 had similar results. (Iron Lake shown)
total instructions in shared programs: 8221239 -> 8220357 (-0.01%)
instructions in affected programs: 54560 -> 53678 (-1.62%)
helped: 186
HURT: 0
helped stats (abs) min: 1 max: 14 x̄: 4.74 x̃: 3
helped stats (rel) min: 0.34% max: 10.77% x̄: 1.97% x̃: 1.17%
95% mean confidence interval for instructions value: -5.21 -4.28
95% mean confidence interval for instructions %-change: -2.23% -1.72%
Instructions are helped.
total cycles in shared programs: 188654442 -> 188650364 (<.01%)
cycles in affected programs: 1454384 -> 1450306 (-0.28%)
helped: 204
HURT: 0
helped stats (abs) min: 2 max: 84 x̄: 19.99 x̃: 18
helped stats (rel) min: 0.02% max: 4.69% x̄: 0.56% x̃: 0.22%
95% mean confidence interval for cycles value: -22.38 -17.60
95% mean confidence interval for cycles %-change: -0.67% -0.46%
Cycles are helped.
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 55e46b04466..59ba4add43e 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -127,6 +127,15 @@ optimizations = [ (('~flrp', a, b, 1.0), b), (('~flrp', a, a, b), a), (('~flrp', 0.0, a, b), ('fmul', a, b)), + + # flrp(a, a + b, c) => a + flrp(0, b, c) => a + (b * c) + (('~flrp', a, ('fadd(is_used_once)', a, b), c), ('fadd', ('fmul', b, c), a)), + (('~flrp@32', a, ('fadd', a, b), c), ('fadd', ('fmul', b, c), a), 'options->lower_flrp32'), + (('~flrp@64', a, ('fadd', a, b), c), ('fadd', ('fmul', b, c), a), 'options->lower_flrp64'), + + (('~flrp@32', ('fadd', a, b), ('fadd', a, c), d), ('fadd', ('flrp', b, c, d), a), 'options->lower_flrp32'), + (('~flrp@64', ('fadd', a, b), ('fadd', a, c), d), ('fadd', ('flrp', b, c, d), a), 'options->lower_flrp64'), + (('~flrp', a, b, ('b2f', 'c@1')), ('bcsel', c, b, a), 'options->lower_flrp32'), (('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)), (('flrp@16', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp16'), @@ -1063,6 +1072,10 @@ before_ffma_optimizations = [ (('iadd', a, ('iadd', ('ineg', a), b)), b), (('~fadd', ('fneg', a), ('fadd', a, b)), b), (('~fadd', a, ('fadd', ('fneg', a), b)), b), + + (('~flrp@32', ('fadd(is_used_once)', a, -1.0), ('fadd(is_used_once)', a, 1.0), d), ('fadd', ('flrp', -1.0, 1.0, d), a)), + (('~flrp@32', ('fadd(is_used_once)', a, 1.0), ('fadd(is_used_once)', a, -1.0), d), ('fadd', ('flrp', 1.0, -1.0, d), a)), + (('~flrp@32', ('fadd(is_used_once)', a, '#b'), ('fadd(is_used_once)', a, '#c'), d), ('fadd', ('fmul', d, ('fadd', c, ('fneg', b))), ('fadd', a, b))), ] # This section contains "late" optimizations that should be run after the @@ -1086,6 +1099,9 @@ late_optimizations = [ (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'), (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'), + (('~flrp@32', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)), + (('~flrp@64', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)), + (('b2f(is_used_more_than_once)', ('inot', 'a@1')), ('bcsel', a, 0.0, 1.0)), (('fneg(is_used_more_than_once)', ('b2f', ('inot', 'a@1'))), ('bcsel', a, -0.0, -1.0)), |