diff options
author | Rhys Perry <[email protected]> | 2019-10-09 15:27:07 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-01-29 14:30:33 +0000 |
commit | 1f72857739beed55276f263f49b3802c336b8c58 (patch) | |
tree | f821a7ad175afdf7847c92ea2e2a812754233713 | |
parent | 5476d1818300835d120c21bcd5bd5967f5b66c84 (diff) |
nir/algebraic: add some half packing optimizations
pipeline-db (ACO):
Totals from affected shaders:
SGPRS: 29200 -> 29200 (0.00 %)
VGPRS: 17372 -> 17372 (0.00 %)
Spilled SGPRs: 105 -> 105 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 1406576 -> 1389256 (-1.23 %) bytes
LDS: 83 -> 83 (0.00 %) blocks
Max Waves: 3976 -> 3976 (0.00 %)
pipeline-db (LLVM):
Totals from affected shaders:
SGPRS: 21320 -> 21320 (0.00 %)
VGPRS: 17056 -> 17036 (-0.12 %)
Spilled SGPRs: 22 -> 22 (0.00 %)
Spilled VGPRs: 503 -> 487 (-3.18 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 396 -> 396 (0.00 %) dwords per thread
Code Size: 1441244 -> 1423292 (-1.25 %) bytes
LDS: 463 -> 463 (0.00 %) blocks
Max Waves: 3609 -> 3611 (0.06 %)
v2: add pattern for ishr
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Connor Abbott <[email protected]>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/2271>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/2271>
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index fd0007bb54d..f9fc119505f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -935,6 +935,15 @@ optimizations.extend([ (('unpack_half_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_half_2x16_split_y', a)), (('unpack_32_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_32_2x16_split_y', a)), (('unpack_64_2x32_split_y', ('iand', a, 0xffffffff00000000)), ('unpack_64_2x32_split_y', a)), + + # Optimize half packing + (('ishl', ('pack_half_2x16', ('vec2', a, 0)), 16), ('pack_half_2x16', ('vec2', 0, a))), + (('ishr', ('pack_half_2x16', ('vec2', 0, a)), 16), ('pack_half_2x16', ('vec2', a, 0))), + + (('iadd', ('pack_half_2x16', ('vec2', a, 0)), ('pack_half_2x16', ('vec2', 0, b))), + ('pack_half_2x16', ('vec2', a, b))), + (('ior', ('pack_half_2x16', ('vec2', a, 0)), ('pack_half_2x16', ('vec2', 0, b))), + ('pack_half_2x16', ('vec2', a, b))), ]) # After the ('extract_u8', a, 0) pattern, above, triggers, there will be |