aboutsummaryrefslogtreecommitdiffstats
path: root/src/compiler/nir/nir_opt_algebraic.py
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-07-11 15:31:50 +0100
committerRhys Perry <[email protected]>2019-08-16 12:13:01 +0100
commit0a790c3019f24dcf905c1c6245827525a11f2a44 (patch)
tree23b07a8f7665ee0f5b38527514c9f705764f0248 /src/compiler/nir/nir_opt_algebraic.py
parent861c2b8d31ea3abf59062deddbbaeaa51e1583e0 (diff)
nir/algebraic: add a few masking-before-unpack optimizations
Helps some Dawn of War 3 and F1 2017 shaders with ACO: Totals from affected shaders: SGPRS: 2136 -> 2128 (-0.37 %) VGPRS: 1624 -> 1628 (0.25 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 168068 -> 164332 (-2.22 %) bytes LDS: 44 -> 44 (0.00 %) blocks Max Waves: 222 -> 221 (-0.45 %) Wait states: 0 -> 0 (0.00 %) Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Ian Romanick <[email protected]> Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir_opt_algebraic.py')
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 40f718e5ae2..26e2fc346ed 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -882,7 +882,15 @@ optimizations.extend([
(('ishr', 'a@16', 8), ('extract_i8', a, 1), '!options->lower_extract_byte'),
(('ishr', 'a@32', 24), ('extract_i8', a, 3), '!options->lower_extract_byte'),
(('ishr', 'a@64', 56), ('extract_i8', a, 7), '!options->lower_extract_byte'),
- (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte')
+ (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'),
+
+ # Useless masking before unpacking
+ (('unpack_half_2x16_split_x', ('iand', a, 0xffff)), ('unpack_half_2x16_split_x', a)),
+ (('unpack_32_2x16_split_x', ('iand', a, 0xffff)), ('unpack_32_2x16_split_x', a)),
+ (('unpack_64_2x32_split_x', ('iand', a, 0xffffffff)), ('unpack_64_2x32_split_x', a)),
+ (('unpack_half_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_half_2x16_split_y', a)),
+ (('unpack_32_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_32_2x16_split_y', a)),
+ (('unpack_64_2x32_split_y', ('iand', a, 0xffffffff00000000)), ('unpack_64_2x32_split_y', a)),
])
# After the ('extract_u8', a, 0) pattern, above, triggers, there will be