diff options
author | Rob Clark <[email protected]> | 2018-01-21 12:31:51 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2018-02-10 14:54:58 -0500 |
commit | 942341bcd0128fb9d9caf68b33f603855e2f6d69 (patch) | |
tree | 1522ae37ae37131f4364bbac842013216ef19236 /src/gallium | |
parent | b2fc94f0745b375b586dbd2685b1c268f4267747 (diff) |
freedreno/ir3: don't lower fsat
Instead, if possible fold (sat) flag into src, otherwise use:
(sat)max.f rD, rS, rS
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 21 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_nir.c | 1 |
3 files changed, 23 insertions, 1 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 210df20b812..84235cc1cd2 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -616,6 +616,8 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr) break; case OPC_ABSNEG_F: case OPC_ABSNEG_S: + if (instr->flags & IR3_INSTR_SAT) + return false; break; default: return false; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 009dcb30fa5..72e0f4fe288 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -964,6 +964,27 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) case nir_op_fmin: dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0); break; + case nir_op_fsat: + /* if there is just a single use of the src, and it supports + * (sat) bit, we can just fold the (sat) flag back to the + * src instruction and create a mov. This is easier for cp + * to eliminate. + * + * TODO probably opc_cat==4 is ok too + */ + if (alu->src[0].src.is_ssa && + (list_length(&alu->src[0].src.ssa->uses) == 1) && + ((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) { + src[0]->flags |= IR3_INSTR_SAT; + dst[0] = ir3_MOV(b, src[0], TYPE_U32); + } else { + /* otherwise generate a max.f that saturates.. blob does + * similar (generating a cat2 mov using max.f) + */ + dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0); + dst[0]->flags |= IR3_INSTR_SAT; + } + break; case nir_op_fmul: dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0); break; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c index 2393306e19c..81a46be914e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -37,7 +37,6 @@ static const nir_shader_compiler_options options = { .lower_fpow = true, - .lower_fsat = true, .lower_scmp = true, .lower_flrp32 = true, .lower_flrp64 = true, |