diff options
author | Karol Herbst <[email protected]> | 2017-07-30 17:51:22 +0200 |
---|---|---|
committer | Andres Gomez <[email protected]> | 2017-08-19 17:38:58 +0300 |
commit | 8aa358bd69862d4a88148942c85ee74048fa2561 (patch) | |
tree | 5f53b82c940ac110013021eaed085759dedf645a | |
parent | 9f8925702dea4bea5557dedacef32c482566950c (diff) |
nv50/ir: fix ConstantFolding with saturation
For mul(a, +-1) codegen can generate OP_MOV with a saturation flag
set which is ignored at emission. The same can happen with add(a, 0),
and others.
Adding an assert for detecting more of such issues.
Fixes wrongly rendered water in Hitman Absolution running under wine.
Also a few shaders in Mad Max and Alien Isolation produce such MOVs.
CC: <[email protected]>
Signed-off-by: Karol Herbst <[email protected]>
Reviewed-by: Tobias Klausmann <[email protected]>
[imirkin: generalize the fix for other cases]
Reviewed-by: Ilia Mirkin <[email protected]>
(cherry picked from commit 24a799ad35a824fba94062f9b018f603717ed145)
squashed with:
nv50/ir: clean up saturated values immediately
Since we don't iterate to a fixed point, we can end up in situations
where we have a SAT instruction + a long immediate. This is not legal.
However since it's immediately computable, just run unary straight away
to handle the situation.
Fixes: 24a799ad35a82 ("nv50/ir: fix ConstantFolding with saturation")
Reported-by: Tobias Klausmann <[email protected]>
Signed-off-by: Ilia Mirkin <[email protected]>
Cc: [email protected]
(cherry picked from commit 165e18dd219fbf4d60fd582b02e2dbf75ccd026f)
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 15 |
2 files changed, 15 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 14c00bd1870..58594f02c7f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -2006,6 +2006,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref) void CodeEmitterNVC0::emitMOV(const Instruction *i) { + assert(!i->saturate); if (i->def(0).getFile() == FILE_PREDICATE) { if (i->src(0).getFile() == FILE_GPR) { code[0] = 0xfc01c003; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 0fbf6b8190d..68d02f870c5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -727,7 +727,9 @@ ConstantFolding::expr(Instruction *i, // Leave PFETCH alone... we just folded its 2 args into 1. break; default: - i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */ + i->op = i->saturate ? OP_SAT : OP_MOV; + if (i->saturate) + unary(i, *i->getSrc(0)->asImm()); break; } i->subOp = 0; @@ -1509,6 +1511,17 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) default: return; } + + // This can get left behind some of the optimizations which simplify + // saturatable values. + if (newi->op == OP_MOV && newi->saturate) { + ImmediateValue tmp; + newi->saturate = 0; + newi->op = OP_SAT; + if (newi->src(0).getImmediate(tmp)) + unary(newi, tmp); + } + if (newi->op != op) foldCount++; } |