aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2020-03-27 16:40:38 +0100
committerMarge Bot <[email protected]>2020-03-30 14:07:43 +0000
commit3935a729d998274ba78ab70e9eb6dd7dac2c2368 (patch)
tree8912c3cfdf273042acc9877f7e79992f1e983def
parent2e92d3381988a85b2a6dcc8d8a8d7158ace9f348 (diff)
nir/algebraic: add fexp2(fmul(flog2(a), 0.5) -> fsqrt(a) optimization
Helps some Wolfenstein II and Wolfenstein Youngblood shaders. pipeline-db (VEGA10/ACO): Totals from affected shaders: SGPRS: 17904 -> 17904 (0.00 %) VGPRS: 14492 -> 14492 (0.00 %) Spilled SGPRs: 20 -> 20 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 1753152 -> 1749708 (-0.20 %) bytes Max Waves: 2581 -> 2581 (0.00 %) pipeline-db (VEGA10/LLVM): Totals from affected shaders: SGPRS: 26656 -> 26656 (0.00 %) VGPRS: 23780 -> 23780 (0.00 %) Spilled SGPRs: 2112 -> 2112 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 2552712 -> 2549236 (-0.14 %) bytes Max Waves: 3359 -> 3359 (0.00 %) Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Alyssa Rosenzweig <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4353> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4353>
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 3302cd8d9e1..1625d6703e2 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -787,6 +787,7 @@ optimizations.extend([
(('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
(('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
+ (('~fexp2', ('fmul', ('flog2', a), 0.5)), ('fsqrt', a)),
(('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)),
(('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), ('fmul', a, a))),
(('~fpow', a, 1.0), a),