nir: Add a fdot instruction that replicates the result to a vec4

Fortunately, nir_constant_expr already auto-splats if "dst" never shows up in the constant expression field so we don't need to do anything there. Reviewed-by: Connor Abbott <[email protected]> Reviewed-by: Eduardo Lima Mitev <[email protected]>
author: Jason Ekstrand <[email protected]> 2015-09-10 10:51:46 -0700
committer: Jason Ekstrand <[email protected]> 2015-09-15 12:38:48 -0700
commit: 47739c7df430664c3a998163a1e8a4a5e1901691 (patch)
tree: d6fa66bfb82d9e2e73b7ad755a7abae1eb84cadd
parent: 2458ea95c5676807a064f24ec720f12506975402 (diff)
3 files changed, 12 insertions, 0 deletions
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 3f693b17fb1..4e4543ad5ec 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1434,6 +1434,12 @@ typedef struct nir_shader_compiler_options {
    /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
    bool lower_scmp;
 
+   /* Does the native fdot instruction replicate its result for four
+    * components?  If so, then opt_algebraic_late will turn all fdotN
+    * instructions into fdot_replicatedN instructions.
+    */
+   bool fdot_replicates;
+
    /**
     * Does the driver support real 32-bit integers?  (Otherwise, integers
     * are simulated by floats.)
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index df5b7e2d517..495d109375b 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -453,6 +453,9 @@ binop("fxor", tfloat, commutative,
 binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
              "{src}")
 
+binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+             "{src0} * {src1}", "{src0} + {src1}", "{src}")
+
 binop("fmin", tfloat, "", "fminf(src0, src1)")
 binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
 binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index 226e0a8d85c..acc3b04b118 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -240,6 +240,9 @@ late_optimizations = [
    (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
    (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
    (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+   (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+   (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+   (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
 ]
 
 print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
author	Jason Ekstrand <[email protected]>	2015-09-10 10:51:46 -0700
committer	Jason Ekstrand <[email protected]>	2015-09-15 12:38:48 -0700
commit	47739c7df430664c3a998163a1e8a4a5e1901691 (patch)
tree	d6fa66bfb82d9e2e73b7ad755a7abae1eb84cadd
parent	2458ea95c5676807a064f24ec720f12506975402 (diff)