aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2018-03-27 22:57:07 -0700
committerIan Romanick <[email protected]>2019-05-14 11:38:23 -0700
commit32d259713b7044a25241ce377f68f71ead703e94 (patch)
treedf94cbd88860d4443bafb3b8264aab13e7e17f99
parenta7f0c57673d2c56c46c4383a7008f525102cd33b (diff)
nir/algebraic: Commute 1-fsat(a) to fsat(1-a) for all non-fmul instructions
The goal is to avoid having an extra MOV instruction to perform the saturate. Doing the subtraction first allows the saturate to be applied to the ADD instruction making the MOV unnecessary. Values generated in different block and values from non-ALU instructions (e.g., texture instructions) almost always need the extra MOV. Multiply instructions are restricted because doing this rearrangement can interfere with the generation of flrp and ffma instructions. v2: Now that the final method has been selected, squash three commits into one. All Intel platforms has similar results. (Ice Lake shown) total instructions in shared programs: 17223214 -> 17219386 (-0.02%) instructions in affected programs: 1524376 -> 1520548 (-0.25%) helped: 2686 HURT: 26 helped stats (abs) min: 1 max: 32 x̄: 1.44 x̃: 1 helped stats (rel) min: 0.03% max: 16.67% x̄: 0.54% x̃: 0.37% HURT stats (abs) min: 1 max: 2 x̄: 1.69 x̃: 2 HURT stats (rel) min: 0.33% max: 1.67% x̄: 0.54% x̃: 0.35% 95% mean confidence interval for instructions value: -1.46 -1.36 95% mean confidence interval for instructions %-change: -0.56% -0.50% Instructions are helped. total cycles in shared programs: 360811571 -> 360791896 (<.01%) cycles in affected programs: 103650214 -> 103630539 (-0.02%) helped: 1557 HURT: 675 helped stats (abs) min: 1 max: 1773 x̄: 41.44 x̃: 16 helped stats (rel) min: <.01% max: 26.77% x̄: 1.37% x̃: 0.64% HURT stats (abs) min: 1 max: 1513 x̄: 66.44 x̃: 14 HURT stats (rel) min: <.01% max: 46.16% x̄: 2.00% x̃: 0.49% 95% mean confidence interval for cycles value: -14.82 -2.81 95% mean confidence interval for cycles %-change: -0.50% -0.20% Cycles are helped. LOST: 2 GAINED: 0 Reviewed-by: Matt Turner <[email protected]> [v1] Reviewed-by: Thomas Helland <[email protected]>
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py13
-rw-r--r--src/compiler/nir/nir_search_helpers.h16
2 files changed, 24 insertions, 5 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 79058c06b97..1c7b3597c1f 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -152,11 +152,7 @@ optimizations = [
(('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg', c ) )), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp32'),
(('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg', c ) )), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp64'),
# These are the same as the previous three rules, but it depends on
- # 1-fsat(x) <=> fsat(1-x):
- #
- # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
- # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
- # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
+ # 1-fsat(x) <=> fsat(1-x). See below.
(('~fadd@32', ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c )))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp32'),
(('~fadd@64', ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c )))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp64'),
@@ -177,6 +173,11 @@ optimizations = [
(('fdot3', ('vec3', a, 0.0, 0.0), b), ('fmul', a, b)),
(('fdot3', ('vec3', a, b, 0.0), c), ('fdot2', ('vec2', a, b), c)),
+ # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
+ # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
+ # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
+ (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
+
# (a * #b + #c) << #d
# ((a * #b) << #d) + (#c << #d)
# (a * (#b << #d)) + (#c << #d)
@@ -1179,6 +1180,8 @@ late_optimizations = [
(('ior', a, a), a),
(('iand', a, a), a),
+ (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
+
(('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
(('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
index 9e03c610ece..631c65a8642 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -174,6 +174,22 @@ is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,
}
static inline bool
+is_not_fmul(nir_alu_instr *instr, unsigned src,
+ UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
+{
+ nir_alu_instr *src_alu =
+ nir_src_as_alu_instr(instr->src[src].src);
+
+ if (src_alu == NULL)
+ return true;
+
+ if (src_alu->op == nir_op_fneg)
+ return is_not_fmul(src_alu, 0, 0, NULL);
+
+ return src_alu->op != nir_op_fmul;
+}
+
+static inline bool
is_used_once(nir_alu_instr *instr)
{
bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses);