diff options
author | Ian Romanick <[email protected]> | 2019-11-27 16:26:03 -0800 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-01-23 00:18:57 +0000 |
commit | b065d8fb8cf55373bfdd80994417f1ac60976158 (patch) | |
tree | da8470b1c420d12537c28ca2fcc93599ff31b661 /src/compiler | |
parent | c57338b924710b93193f921cd1e95d6de6b398ef (diff) |
nir/algebraic: Optimize some 64-bit integer comparisons involving zero
I noticed that we can do better for these kinds of comparisons while
working on the lowering for iadd_sat@64 and isub_sat@64. This
eliminated 11 instruction from the fs-addSaturate-int64.shader_test.
My hope is that this will improve the run-time of int64 tests on Ice
Lake. I have no data to support or refute this.
Unsurprisingly, no changes on shader-db.
v2: Condition the min and max patterns with nir_lower_minmax64.
Suggested by Caio. Very long discussion in the MR. :)
Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/767>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/767>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index c52522379c6..7b9a6a8e45d 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1063,6 +1063,24 @@ optimizations.extend([ 0x7fffffffffffffff)), '(options->lower_int64_options & nir_lower_iadd64) != 0'), + # These are done here instead of in the backend because the int64 lowering + # pass will make a mess of the patterns. The first patterns are + # conditioned on nir_lower_minmax64 because it was not clear that it was + # always an improvement on platforms that have real int64 support. No + # shaders in shader-db hit this, so it was hard to say one way or the + # other. + (('ilt', ('imax(is_used_once)', 'a@64', 'b@64'), 0), ('ilt', ('imax', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'), + (('ilt', ('imin(is_used_once)', 'a@64', 'b@64'), 0), ('ilt', ('imin', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'), + (('ige', ('imax(is_used_once)', 'a@64', 'b@64'), 0), ('ige', ('imax', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'), + (('ige', ('imin(is_used_once)', 'a@64', 'b@64'), 0), ('ige', ('imin', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'), + (('ilt', 'a@64', 0), ('ilt', ('unpack_64_2x32_split_y', a), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + (('ige', 'a@64', 0), ('ige', ('unpack_64_2x32_split_y', a), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + + (('ine', 'a@64', 0), ('ine', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + (('ieq', 'a@64', 0), ('ieq', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + # 0u < uint(a) <=> uint(a) != 0u + (('ult', 0, 'a@64'), ('ine', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + # Alternative lowering that doesn't rely on bfi. (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), ('bcsel', ('ult', 31, 'bits'), |