summaryrefslogtreecommitdiffstats
path: root/src/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/nir/nir.h3
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py6
2 files changed, 9 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f41302676d1..b011f868bc2 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2537,6 +2537,9 @@ typedef struct nir_shader_compiler_options {
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
bool lower_scmp;
+ /* lower fall_equalN/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
+ bool lower_vector_cmp;
+
/** enables rules to lower idiv by power-of-two: */
bool lower_idiv;
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 1ac1e6c64c0..e4f4a652c19 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -568,6 +568,12 @@ optimizations = [
(('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
(('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
(('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+ (('fall_equal2', a, b), ('fmin', ('seq', 'a.x', 'b.x'), ('seq', 'a.y', 'b.y')), 'options->lower_vector_cmp'),
+ (('fall_equal3', a, b), ('seq', ('fany_nequal3', a, b), 0.0), 'options->lower_vector_cmp'),
+ (('fall_equal4', a, b), ('seq', ('fany_nequal4', a, b), 0.0), 'options->lower_vector_cmp'),
+ (('fany_nequal2', a, b), ('fmax', ('sne', 'a.x', 'b.x'), ('sne', 'a.y', 'b.y')), 'options->lower_vector_cmp'),
+ (('fany_nequal3', a, b), ('fsat', ('fdot3', ('sne', a, b), ('sne', a, b))), 'options->lower_vector_cmp'),
+ (('fany_nequal4', a, b), ('fsat', ('fdot4', ('sne', a, b), ('sne', a, b))), 'options->lower_vector_cmp'),
(('fne', ('fneg', a), a), ('fne', a, 0.0)),
(('feq', ('fneg', a), a), ('feq', a, 0.0)),
# Emulating booleans