summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/ir3
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2018-01-21 12:31:51 -0500
committerRob Clark <[email protected]>2018-02-10 14:54:58 -0500
commit942341bcd0128fb9d9caf68b33f603855e2f6d69 (patch)
tree1522ae37ae37131f4364bbac842013216ef19236 /src/gallium/drivers/freedreno/ir3
parentb2fc94f0745b375b586dbd2685b1c268f4267747 (diff)
freedreno/ir3: don't lower fsat
Instead, if possible fold (sat) flag into src, otherwise use: (sat)max.f rD, rS, rS Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/ir3')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c21
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir.c1
3 files changed, 23 insertions, 1 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 210df20b812..84235cc1cd2 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -616,6 +616,8 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
break;
case OPC_ABSNEG_F:
case OPC_ABSNEG_S:
+ if (instr->flags & IR3_INSTR_SAT)
+ return false;
break;
default:
return false;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 009dcb30fa5..72e0f4fe288 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -964,6 +964,27 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
case nir_op_fmin:
dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0);
break;
+ case nir_op_fsat:
+ /* if there is just a single use of the src, and it supports
+ * (sat) bit, we can just fold the (sat) flag back to the
+ * src instruction and create a mov. This is easier for cp
+ * to eliminate.
+ *
+ * TODO probably opc_cat==4 is ok too
+ */
+ if (alu->src[0].src.is_ssa &&
+ (list_length(&alu->src[0].src.ssa->uses) == 1) &&
+ ((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) {
+ src[0]->flags |= IR3_INSTR_SAT;
+ dst[0] = ir3_MOV(b, src[0], TYPE_U32);
+ } else {
+ /* otherwise generate a max.f that saturates.. blob does
+ * similar (generating a cat2 mov using max.f)
+ */
+ dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0);
+ dst[0]->flags |= IR3_INSTR_SAT;
+ }
+ break;
case nir_op_fmul:
dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0);
break;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 2393306e19c..81a46be914e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -37,7 +37,6 @@
static const nir_shader_compiler_options options = {
.lower_fpow = true,
- .lower_fsat = true,
.lower_scmp = true,
.lower_flrp32 = true,
.lower_flrp64 = true,