diff options
author | Ilia Mirkin <[email protected]> | 2015-04-28 03:30:08 -0400 |
---|---|---|
committer | Emil Velikov <[email protected]> | 2015-05-05 14:58:39 +0100 |
commit | dfe88d4f5288a39afde942b3a4c91637a79c924c (patch) | |
tree | f96b0af87d1e0229c99aba7d6c088c02215bed43 | |
parent | 1176e5862ac656f1c1a790ea5b49ced4fcb3f480 (diff) |
nvc0/ir: flush denorms to zero in non-compute shaders
This will set the FTZ flag (flush denorms to zero) on all opcodes that
can take it.
This resolves issues in Unigine Heaven 4.0 where there were solid-filled
boxes popping up.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89455
Cc: "10.4 10.5" <[email protected]>
Signed-off-by: Ilia Mirkin <[email protected]>
(cherry picked from commit 6fe0d4f0354418c6e68dd352996e9891ddd4dfd6)
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 25 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 |
2 files changed, 25 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index c2341317b1a..4000812f8b1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -73,6 +73,26 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) // TODO } +void +NVC0LegalizeSSA::handleFTZ(Instruction *i) +{ + // Only want to flush float inputs + if (i->sType != TYPE_F32) + return; + + // If we're already flushing denorms (and NaN's) to zero, no need for this. + if (i->dnz) + return; + + // Only certain classes of operations can flush + OpClass cls = prog->getTarget()->getOpClass(i->op); + if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE && + cls != OPCLASS_CONVERT) + return; + + i->ftz = true; +} + bool NVC0LegalizeSSA::visit(Function *fn) { @@ -86,8 +106,11 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) Instruction *next; for (Instruction *i = bb->getEntry(); i; i = next) { next = i->next; - if (i->dType == TYPE_F32) + if (i->dType == TYPE_F32) { + if (prog->getType() != Program::TYPE_COMPUTE) + handleFTZ(i); continue; + } switch (i->op) { case OP_DIV: case OP_MOD: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index d8ff5cd3d96..260e101a1fb 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -36,6 +36,7 @@ private: // we want to insert calls to the builtin library only after optimization void handleDIV(Instruction *); // integer division, modulus void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt + void handleFTZ(Instruction *); private: BuildUtil bld; |