diff options
author | Jonathan Marek <[email protected]> | 2019-06-20 21:47:16 -0400 |
---|---|---|
committer | Jonathan Marek <[email protected]> | 2019-07-24 17:36:21 -0400 |
commit | 1e089d05750cfc9555a421eaa0c44005361ebb4b (patch) | |
tree | 2e4bb2101bb385480cc1b1cdc4f7b1e5f00622d7 | |
parent | bc3b6168bac39a16326c730a9d0ae97b45c7df23 (diff) |
nir/algebraic: add option to lower fdph
For backends that don't have a 'fdph' instructions
Signed-off-by: Jonathan Marek <[email protected]>
Reviewed-by: Thomas Helland <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/compiler/nir/nir.h | 3 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 4 |
2 files changed, 6 insertions, 1 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 31e731baa44..f41302676d1 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2549,6 +2549,9 @@ typedef struct nir_shader_compiler_options { /** enables rules to lower fsign to fsub and flt */ bool lower_fsign; + /* lower fdph to fdot4 */ + bool lower_fdph; + /* Does the native fdot instruction replicate its result for four * components? If so, then opt_algebraic_late will turn all fdotN * instructions into fdot_replicatedN instructions. diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 951771fa37a..d57eff2fdde 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -181,7 +181,9 @@ optimizations = [ (('~fmul', ('fadd', ('iand', ('ineg', ('b2i32', 'a@bool')), ('fmul', b, c)), '#d'), '#e'), ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))), - (('fdot4', ('vec4', a, b, c, 1.0), d), ('fdph', ('vec3', a, b, c), d)), + (('fdph', a, b), ('fdot4', ('vec4', 'a.x', 'a.y', 'a.z', 1.0), b), 'options->lower_fdph'), + + (('fdot4', ('vec4', a, b, c, 1.0), d), ('fdph', ('vec3', a, b, c), d), '!options->lower_fdph'), (('fdot4', ('vec4', a, 0.0, 0.0, 0.0), b), ('fmul', a, b)), (('fdot4', ('vec4', a, b, 0.0, 0.0), c), ('fdot2', ('vec2', a, b), c)), (('fdot4', ('vec4', a, b, c, 0.0), d), ('fdot3', ('vec3', a, b, c), d)), |