nir/algebraic: add new fsum ops and fdot lowering

The Mali400 pp doesn't implement fdot but has fsum3 and fsum4, which can be used to optimize fdot lowering. fsum2 is not implemented and can be further lowered to an add with the vector components. Currently lima ppir handles this lowering internally, however this happens in a very late stage and requires a big chunk of code compared to a nir_opt_algebraic lowering. By having fsum in nir, we can reduce ppir complexity and enable the lowered ops to be part of other nir optimizations in the optimization loop. Signed-off-by: Erico Nunes <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
author: Erico Nunes <[email protected]> 2019-07-27 17:58:53 +0200
committer: Erico Nunes <[email protected]> 2019-07-31 21:35:58 +0200
commit: 4a407df68297b434c4489e6d28c2dd18e9eec326 (patch)
tree: 7e3e490d181117d541455900483d2281f47334ad /src/compiler
parent: 7f8ff686b73b057ea38d4e88009d4543fedfa455 (diff)
3 files changed, 11 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a3c44ff988e..088b8d7d5e8 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2556,6 +2556,9 @@ typedef struct nir_shader_compiler_options {
    /* lower fdph to fdot4 */
    bool lower_fdph;
 
+   /** lower fdot to fmul and fsum/fadd. */
+   bool lower_fdot;
+
    /* Does the native fdot instruction replicate its result for four
     * components?  If so, then opt_algebraic_late will turn all fdotN
     * instructions into fdot_replicatedN instructions.
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 7854faec15f..3020da98264 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -439,6 +439,8 @@ if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4;
 if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5;
 """)
 
+# Sum of vector components
+unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}")
 
 def binop_convert(name, out_type, in_type, alg_props, const_expr):
    opcode(name, 0, out_type, [0, 0], [in_type, in_type],
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 351f467b246..9fc3be8f8b0 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -194,6 +194,12 @@ optimizations = [
    (('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)),
    (('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')),
 
+   # Lower fdot to fsum when it is available
+   (('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'),
+   (('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'),
+   (('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'),
+   (('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'),
+
    # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
    # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
    # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
author	Erico Nunes <[email protected]>	2019-07-27 17:58:53 +0200
committer	Erico Nunes <[email protected]>	2019-07-31 21:35:58 +0200
commit	4a407df68297b434c4489e6d28c2dd18e9eec326 (patch)
tree	7e3e490d181117d541455900483d2281f47334ad /src/compiler
parent	7f8ff686b73b057ea38d4e88009d4543fedfa455 (diff)