diff options
author | Ian Romanick <[email protected]> | 2018-03-27 22:57:07 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2019-05-14 11:38:23 -0700 |
commit | 32d259713b7044a25241ce377f68f71ead703e94 (patch) | |
tree | df94cbd88860d4443bafb3b8264aab13e7e17f99 | |
parent | a7f0c57673d2c56c46c4383a7008f525102cd33b (diff) |
nir/algebraic: Commute 1-fsat(a) to fsat(1-a) for all non-fmul instructions
The goal is to avoid having an extra MOV instruction to perform the
saturate. Doing the subtraction first allows the saturate to be applied
to the ADD instruction making the MOV unnecessary. Values generated in
different block and values from non-ALU instructions (e.g., texture
instructions) almost always need the extra MOV.
Multiply instructions are restricted because doing this rearrangement
can interfere with the generation of flrp and ffma instructions.
v2: Now that the final method has been selected, squash three commits
into one.
All Intel platforms has similar results. (Ice Lake shown)
total instructions in shared programs: 17223214 -> 17219386 (-0.02%)
instructions in affected programs: 1524376 -> 1520548 (-0.25%)
helped: 2686
HURT: 26
helped stats (abs) min: 1 max: 32 x̄: 1.44 x̃: 1
helped stats (rel) min: 0.03% max: 16.67% x̄: 0.54% x̃: 0.37%
HURT stats (abs) min: 1 max: 2 x̄: 1.69 x̃: 2
HURT stats (rel) min: 0.33% max: 1.67% x̄: 0.54% x̃: 0.35%
95% mean confidence interval for instructions value: -1.46 -1.36
95% mean confidence interval for instructions %-change: -0.56% -0.50%
Instructions are helped.
total cycles in shared programs: 360811571 -> 360791896 (<.01%)
cycles in affected programs: 103650214 -> 103630539 (-0.02%)
helped: 1557
HURT: 675
helped stats (abs) min: 1 max: 1773 x̄: 41.44 x̃: 16
helped stats (rel) min: <.01% max: 26.77% x̄: 1.37% x̃: 0.64%
HURT stats (abs) min: 1 max: 1513 x̄: 66.44 x̃: 14
HURT stats (rel) min: <.01% max: 46.16% x̄: 2.00% x̃: 0.49%
95% mean confidence interval for cycles value: -14.82 -2.81
95% mean confidence interval for cycles %-change: -0.50% -0.20%
Cycles are helped.
LOST: 2
GAINED: 0
Reviewed-by: Matt Turner <[email protected]> [v1]
Reviewed-by: Thomas Helland <[email protected]>
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 13 | ||||
-rw-r--r-- | src/compiler/nir/nir_search_helpers.h | 16 |
2 files changed, 24 insertions, 5 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 79058c06b97..1c7b3597c1f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -152,11 +152,7 @@ optimizations = [ (('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg', c ) )), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp32'), (('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg', c ) )), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp64'), # These are the same as the previous three rules, but it depends on - # 1-fsat(x) <=> fsat(1-x): - # - # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially - # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1 - # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0 + # 1-fsat(x) <=> fsat(1-x). See below. (('~fadd@32', ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c )))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp32'), (('~fadd@64', ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c )))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp64'), @@ -177,6 +173,11 @@ optimizations = [ (('fdot3', ('vec3', a, 0.0, 0.0), b), ('fmul', a, b)), (('fdot3', ('vec3', a, b, 0.0), c), ('fdot2', ('vec2', a, b), c)), + # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially + # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1 + # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0 + (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))), + # (a * #b + #c) << #d # ((a * #b) << #d) + (#c << #d) # (a * (#b << #d)) + (#c << #d) @@ -1179,6 +1180,8 @@ late_optimizations = [ (('ior', a, a), a), (('iand', a, a), a), + (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))), + (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'), (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'), (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'), diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 9e03c610ece..631c65a8642 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -174,6 +174,22 @@ is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, } static inline bool +is_not_fmul(nir_alu_instr *instr, unsigned src, + UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +{ + nir_alu_instr *src_alu = + nir_src_as_alu_instr(instr->src[src].src); + + if (src_alu == NULL) + return true; + + if (src_alu->op == nir_op_fneg) + return is_not_fmul(src_alu, 0, 0, NULL); + + return src_alu->op != nir_op_fmul; +} + +static inline bool is_used_once(nir_alu_instr *instr) { bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses); |