diff options
author | Jonathan Marek <[email protected]> | 2019-06-02 18:44:49 -0400 |
---|---|---|
committer | Jonathan Marek <[email protected]> | 2019-07-24 17:36:21 -0400 |
commit | 9be902097c72466f60d3178cc6ecaa578fb45d86 (patch) | |
tree | 3f23c93965fd21a2cf4a7791db46305d58fc7247 /src/compiler | |
parent | 397375d3f353ec5027cb883faf9eb831b48f76f3 (diff) |
nir/algebraic: add option to lower fall_equalN/fany_nequalN
Add generic lowerings for fall_equalN/fany_nequalN. These should be optimal
for vec4 backends that doesn't have any special instructions for it, as
long as they support saturate.
Signed-off-by: Jonathan Marek <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/nir/nir.h | 3 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 6 |
2 files changed, 9 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f41302676d1..b011f868bc2 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2537,6 +2537,9 @@ typedef struct nir_shader_compiler_options { /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ bool lower_scmp; + /* lower fall_equalN/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ + bool lower_vector_cmp; + /** enables rules to lower idiv by power-of-two: */ bool lower_idiv; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 1ac1e6c64c0..e4f4a652c19 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -568,6 +568,12 @@ optimizations = [ (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), + (('fall_equal2', a, b), ('fmin', ('seq', 'a.x', 'b.x'), ('seq', 'a.y', 'b.y')), 'options->lower_vector_cmp'), + (('fall_equal3', a, b), ('seq', ('fany_nequal3', a, b), 0.0), 'options->lower_vector_cmp'), + (('fall_equal4', a, b), ('seq', ('fany_nequal4', a, b), 0.0), 'options->lower_vector_cmp'), + (('fany_nequal2', a, b), ('fmax', ('sne', 'a.x', 'b.x'), ('sne', 'a.y', 'b.y')), 'options->lower_vector_cmp'), + (('fany_nequal3', a, b), ('fsat', ('fdot3', ('sne', a, b), ('sne', a, b))), 'options->lower_vector_cmp'), + (('fany_nequal4', a, b), ('fsat', ('fdot4', ('sne', a, b), ('sne', a, b))), 'options->lower_vector_cmp'), (('fne', ('fneg', a), a), ('fne', a, 0.0)), (('feq', ('fneg', a), a), ('feq', a, 0.0)), # Emulating booleans |