diff options
author | Erico Nunes <[email protected]> | 2019-07-27 17:58:53 +0200 |
---|---|---|
committer | Erico Nunes <[email protected]> | 2019-07-31 21:35:58 +0200 |
commit | 4a407df68297b434c4489e6d28c2dd18e9eec326 (patch) | |
tree | 7e3e490d181117d541455900483d2281f47334ad /src/compiler | |
parent | 7f8ff686b73b057ea38d4e88009d4543fedfa455 (diff) |
nir/algebraic: add new fsum ops and fdot lowering
The Mali400 pp doesn't implement fdot but has fsum3 and fsum4, which can
be used to optimize fdot lowering. fsum2 is not implemented and can be
further lowered to an add with the vector components.
Currently lima ppir handles this lowering internally, however this
happens in a very late stage and requires a big chunk of code compared
to a nir_opt_algebraic lowering.
By having fsum in nir, we can reduce ppir complexity and enable the
lowered ops to be part of other nir optimizations in the optimization
loop.
Signed-off-by: Erico Nunes <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/nir/nir.h | 3 | ||||
-rw-r--r-- | src/compiler/nir/nir_opcodes.py | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 6 |
3 files changed, 11 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index a3c44ff988e..088b8d7d5e8 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2556,6 +2556,9 @@ typedef struct nir_shader_compiler_options { /* lower fdph to fdot4 */ bool lower_fdph; + /** lower fdot to fmul and fsum/fadd. */ + bool lower_fdot; + /* Does the native fdot instruction replicate its result for four * components? If so, then opt_algebraic_late will turn all fdotN * instructions into fdot_replicatedN instructions. diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 7854faec15f..3020da98264 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -439,6 +439,8 @@ if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4; if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5; """) +# Sum of vector components +unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}") def binop_convert(name, out_type, in_type, alg_props, const_expr): opcode(name, 0, out_type, [0, 0], [in_type, in_type], diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 351f467b246..9fc3be8f8b0 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -194,6 +194,12 @@ optimizations = [ (('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)), (('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')), + # Lower fdot to fsum when it is available + (('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'), + (('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'), + (('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'), + (('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'), + # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1 # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0 |