diff options
author | Rhys Perry <[email protected]> | 2019-10-09 15:03:45 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-01-29 14:30:33 +0000 |
commit | 5476d1818300835d120c21bcd5bd5967f5b66c84 (patch) | |
tree | 278668ac6f58916ded284b916a794cf8f8c0555d /src | |
parent | 6aecc316c000c343b25963c1356525f95ea6cafe (diff) |
nir/algebraic: add patterns for a >> #b << #b
Fixes compilation of a Battlefront 2 shader with ACO by removing VGPR
spilling. The reassociation makes it worse on LLVM though.
pipeline-db (ACO):
Totals from affected shaders:
SGPRS: 10704 -> 10688 (-0.15 %)
VGPRS: 18736 -> 18528 (-1.11 %)
Spilled SGPRs: 70 -> 70 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 909696 -> 885796 (-2.63 %) bytes
LDS: 225 -> 225 (0.00 %) blocks
Max Waves: 1115 -> 1129 (1.26 %)
pipeline-db (LLVM):
Totals from affected shaders:
SGPRS: 8472 -> 8424 (-0.57 %)
VGPRS: 14284 -> 14368 (0.59 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 442 -> 503 (13.80 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 268 -> 396 (47.76 %) dwords per thread
Code Size: 862568 -> 853028 (-1.11 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 971 -> 964 (-0.72 %)
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Connor Abbott <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/2271>
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 16 | ||||
-rw-r--r-- | src/compiler/nir/nir_search_helpers.h | 26 |
2 files changed, 42 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 7b9a6a8e45d..fd0007bb54d 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -255,6 +255,22 @@ for s in [8, 16, 32, 64]: (ishr, a, ('imin', ('iadd', ('iand', b, mask), ('iand', c, mask)), s - 1))), ]) +# Optimize a pattern of address calculation created by DXVK where the offset is +# divided by 4 and then multipled by 4. This can be turned into an iand and the +# additions before can be reassociated to CSE the iand instruction. +for log2 in range(1, 7): # powers of two from 2 to 64 + v = 1 << log2 + mask = 0xffffffff & ~(v - 1) + b_is_multiple = '#b(is_unsigned_multiple_of_{})'.format(v) + + optimizations.extend([ + # 'a >> #b << #b' -> 'a & ~((1 << #b) - 1)' + (('ishl@32', ('ushr@32', a, log2), log2), ('iand', a, mask)), + + # Reassociate for improved CSE + (('iand@32', ('iadd@32', a, b_is_multiple), mask), ('iadd', ('iand', a, mask), b)), + ]) + optimizations.extend([ # This is common for address calculations. Reassociating may enable the # 'a<<c' to be CSE'd. It also helps architectures that have an ISHLADD diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 0c2f631ac39..abfd660ae79 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -90,6 +90,32 @@ is_neg_power_of_two(UNUSED struct hash_table *ht, nir_alu_instr *instr, return true; } +#define MULTIPLE(test) \ +static inline bool \ +is_unsigned_multiple_of_ ## test(UNUSED struct hash_table *ht, nir_alu_instr *instr, \ + unsigned src, unsigned num_components, \ + const uint8_t *swizzle) \ +{ \ + /* only constant srcs: */ \ + if (!nir_src_is_const(instr->src[src].src)) \ + return false; \ + \ + for (unsigned i = 0; i < num_components; i++) { \ + uint64_t val = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]); \ + if (val % test != 0) \ + return false; \ + } \ + \ + return true; \ +} + +MULTIPLE(2) +MULTIPLE(4) +MULTIPLE(8) +MULTIPLE(16) +MULTIPLE(32) +MULTIPLE(64) + static inline bool is_zero_to_one(UNUSED struct hash_table *ht, nir_alu_instr *instr, unsigned src, unsigned num_components, |