diff options
author | Ian Romanick <[email protected]> | 2019-08-09 12:48:27 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2019-08-29 13:15:53 -0700 |
commit | 636da1243346e4e2a5aaf79bac65850884a9b859 (patch) | |
tree | 8b2b27aa08c1e57d33e24c819636d551be5de1be | |
parent | 7dba7df5e577b94e009848a2ca3e0b0a41629fe9 (diff) |
nir/range-analysis: Range tracking for fpow
One shader from Metro Last Light and the rest from Rochard. In the
Rochard cases, something like:
min(1.0, max(pow(saturate(x), y), z))
was transformed to
saturate(max(pow(saturate(x), y), z))
because the result of the pow must be >= 0.
The Metro Last Light case was similar. An instance of
min(pow(abs(x), y), 1.0)
became
saturate(pow(abs(x), y))
v2: Fix some comments. Suggested by Caio.
v3: Fix setting is_intgral when the exponent might be negative. See
also Mesa MR !1778.
Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
All Intel platforms had similar results. (Ice Lake shown)
total instructions in shared programs: 16280670 -> 16280659 (<.01%)
instructions in affected programs: 1130 -> 1119 (-0.97%)
helped: 11
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.72% max: 1.43% x̄: 1.03% x̃: 0.97%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -1.19% -0.86%
Instructions are helped.
total cycles in shared programs: 367168430 -> 367168270 (<.01%)
cycles in affected programs: 10281 -> 10121 (-1.56%)
helped: 10
HURT: 1
helped stats (abs) min: 16 max: 18 x̄: 17.00 x̃: 17
helped stats (rel) min: 1.31% max: 2.43% x̄: 1.79% x̃: 1.70%
HURT stats (abs) min: 10 max: 10 x̄: 10.00 x̃: 10
HURT stats (rel) min: 3.10% max: 3.10% x̄: 3.10% x̃: 3.10%
95% mean confidence interval for cycles value: -20.06 -9.04
95% mean confidence interval for cycles %-change: -2.36% -0.32%
Cycles are helped.
-rw-r--r-- | src/compiler/nir/nir_range_analysis.c | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 6dfaea167ac..7fbf4217239 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -31,6 +31,12 @@ * the result. */ +static bool +is_not_negative(enum ssa_ranges r) +{ + return r == gt_zero || r == ge_zero || r == eq_zero; +} + static void * pack_data(const struct ssa_result_range r) { @@ -722,6 +728,66 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, r = (struct ssa_result_range){le_zero, false}; break; + case nir_op_fpow: { + /* Due to flush-to-zero semanatics of floating-point numbers with very + * small mangnitudes, we can never really be sure a result will be + * non-zero. + * + * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man + * page for that function says: + * + * If y is 0, the result is 1.0 (even if x is a NaN). + * + * gt_zero: pow(*, eq_zero) + * | pow(eq_zero, lt_zero) # 0^-y = +inf + * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0 + * ; + * + * eq_zero: pow(eq_zero, gt_zero) + * ; + * + * ge_zero: pow(gt_zero, gt_zero) + * | pow(gt_zero, ge_zero) + * | pow(gt_zero, lt_zero) + * | pow(gt_zero, le_zero) + * | pow(gt_zero, ne_zero) + * | pow(gt_zero, unknown) + * | pow(ge_zero, gt_zero) + * | pow(ge_zero, ge_zero) + * | pow(ge_zero, lt_zero) + * | pow(ge_zero, le_zero) + * | pow(ge_zero, ne_zero) + * | pow(ge_zero, unknown) + * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0 + * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0 + * | pow(eq_zero, unknown) # union of all other y cases + * ; + * + * All other cases are unknown. + * + * We could do better if the right operand is a constant, integral + * value. + */ + static const enum ssa_ranges table[last_range + 1][last_range + 1] = { + /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ + /* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero }, + /* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero }, + /* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero }, + /* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero }, + /* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero }, + /* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero }, + /* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero }, + }; + + const struct ssa_result_range left = analyze_expression(alu, 0, ht); + const struct ssa_result_range right = analyze_expression(alu, 1, ht); + + r.is_integral = left.is_integral && right.is_integral && + is_not_negative(right.range); + r.range = table[left.range][right.range]; + break; + } + case nir_op_ffma: { const struct ssa_result_range first = analyze_expression(alu, 0, ht); const struct ssa_result_range second = analyze_expression(alu, 1, ht); |