diff options
author | Samuel Pitoiset <[email protected]> | 2020-03-27 16:40:38 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-03-30 14:07:43 +0000 |
commit | 3935a729d998274ba78ab70e9eb6dd7dac2c2368 (patch) | |
tree | 8912c3cfdf273042acc9877f7e79992f1e983def | |
parent | 2e92d3381988a85b2a6dcc8d8a8d7158ace9f348 (diff) |
nir/algebraic: add fexp2(fmul(flog2(a), 0.5) -> fsqrt(a) optimization
Helps some Wolfenstein II and Wolfenstein Youngblood shaders.
pipeline-db (VEGA10/ACO):
Totals from affected shaders:
SGPRS: 17904 -> 17904 (0.00 %)
VGPRS: 14492 -> 14492 (0.00 %)
Spilled SGPRs: 20 -> 20 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 1753152 -> 1749708 (-0.20 %) bytes
Max Waves: 2581 -> 2581 (0.00 %)
pipeline-db (VEGA10/LLVM):
Totals from affected shaders:
SGPRS: 26656 -> 26656 (0.00 %)
VGPRS: 23780 -> 23780 (0.00 %)
Spilled SGPRs: 2112 -> 2112 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 2552712 -> 2549236 (-0.14 %) bytes
Max Waves: 3359 -> 3359 (0.00 %)
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Alyssa Rosenzweig <[email protected]>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4353>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4353>
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 3302cd8d9e1..1625d6703e2 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -787,6 +787,7 @@ optimizations.extend([ (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))), ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d + (('~fexp2', ('fmul', ('flog2', a), 0.5)), ('fsqrt', a)), (('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)), (('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), ('fmul', a, a))), (('~fpow', a, 1.0), a), |