summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2018-08-18 16:53:55 -0700
committerIan Romanick <[email protected]>2019-05-06 22:52:29 -0700
commitc995d1ca3a3f14c2e6823ecdad90e7bb03e70c41 (patch)
tree598281a5007dcb5192719a7e7b1d093f54fc397f /src
parentae02622d8fd34d62aeeb0a1b6c8c71bb16a5f32d (diff)
nir/flrp: Lower flrp(a, b, #c) differently
This doesn't help on Intel GPUs now because we always take the "always_precise" path first. It may help on other GPUs, and it does prevent a bunch of regressions in "intel/compiler: Don't always require precise lowering of flrp". Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/compiler/nir/nir_lower_flrp.c17
1 files changed, 17 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c
index 5094a714504..31969a61c79 100644
--- a/src/compiler/nir/nir_lower_flrp.c
+++ b/src/compiler/nir/nir_lower_flrp.c
@@ -556,6 +556,23 @@ convert_flrp_instruction(nir_builder *bld,
}
/*
+ * - If t is constant:
+ *
+ * x(1 - t) + yt
+ *
+ * The cost is three instructions without FMA or two instructions with
+ * FMA. This is the same cost as the imprecise lowering, but it gives
+ * the instruction scheduler a little more freedom.
+ *
+ * There is no need to handle t = 0.5 specially. nir_opt_algebraic
+ * already has optimizations to convert 0.5x + 0.5y to 0.5(x + y).
+ */
+ if (alu->src[2].src.ssa->parent_instr->type == nir_instr_type_load_const) {
+ replace_with_strict(bld, dead_flrp, alu);
+ return;
+ }
+
+ /*
* - Otherwise
*
* x + t(x - y)