summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarol Herbst <[email protected]>2017-07-30 17:51:22 +0200
committerAndres Gomez <[email protected]>2017-08-19 17:38:58 +0300
commit8aa358bd69862d4a88148942c85ee74048fa2561 (patch)
tree5f53b82c940ac110013021eaed085759dedf645a
parent9f8925702dea4bea5557dedacef32c482566950c (diff)
nv50/ir: fix ConstantFolding with saturation
For mul(a, +-1) codegen can generate OP_MOV with a saturation flag set which is ignored at emission. The same can happen with add(a, 0), and others. Adding an assert for detecting more of such issues. Fixes wrongly rendered water in Hitman Absolution running under wine. Also a few shaders in Mad Max and Alien Isolation produce such MOVs. CC: <[email protected]> Signed-off-by: Karol Herbst <[email protected]> Reviewed-by: Tobias Klausmann <[email protected]> [imirkin: generalize the fix for other cases] Reviewed-by: Ilia Mirkin <[email protected]> (cherry picked from commit 24a799ad35a824fba94062f9b018f603717ed145) squashed with: nv50/ir: clean up saturated values immediately Since we don't iterate to a fixed point, we can end up in situations where we have a SAT instruction + a long immediate. This is not legal. However since it's immediately computable, just run unary straight away to handle the situation. Fixes: 24a799ad35a82 ("nv50/ir: fix ConstantFolding with saturation") Reported-by: Tobias Klausmann <[email protected]> Signed-off-by: Ilia Mirkin <[email protected]> Cc: [email protected] (cherry picked from commit 165e18dd219fbf4d60fd582b02e2dbf75ccd026f)
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp15
2 files changed, 15 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 14c00bd1870..58594f02c7f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2006,6 +2006,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
void
CodeEmitterNVC0::emitMOV(const Instruction *i)
{
+ assert(!i->saturate);
if (i->def(0).getFile() == FILE_PREDICATE) {
if (i->src(0).getFile() == FILE_GPR) {
code[0] = 0xfc01c003;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 0fbf6b8190d..68d02f870c5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -727,7 +727,9 @@ ConstantFolding::expr(Instruction *i,
// Leave PFETCH alone... we just folded its 2 args into 1.
break;
default:
- i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
+ i->op = i->saturate ? OP_SAT : OP_MOV;
+ if (i->saturate)
+ unary(i, *i->getSrc(0)->asImm());
break;
}
i->subOp = 0;
@@ -1509,6 +1511,17 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
default:
return;
}
+
+ // This can get left behind some of the optimizations which simplify
+ // saturatable values.
+ if (newi->op == OP_MOV && newi->saturate) {
+ ImmediateValue tmp;
+ newi->saturate = 0;
+ newi->op = OP_SAT;
+ if (newi->src(0).getImmediate(tmp))
+ unary(newi, tmp);
+ }
+
if (newi->op != op)
foldCount++;
}