summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2015-04-28 03:30:08 -0400
committerIlia Mirkin <[email protected]>2015-04-28 20:17:03 -0400
commit6fe0d4f0354418c6e68dd352996e9891ddd4dfd6 (patch)
tree3ee26e183db21e11e71ccef4972b99b786df30b9 /src/gallium
parent66985d2a6de0afd9446c92dce999f00efa528d31 (diff)
nvc0/ir: flush denorms to zero in non-compute shaders
This will set the FTZ flag (flush denorms to zero) on all opcodes that can take it. This resolves issues in Unigine Heaven 4.0 where there were solid-filled boxes popping up. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89455 Cc: "10.4 10.5" <[email protected]> Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp25
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h1
2 files changed, 25 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 18e8e67bc87..b61f3c49bb9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -96,6 +96,26 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
}
+void
+NVC0LegalizeSSA::handleFTZ(Instruction *i)
+{
+ // Only want to flush float inputs
+ if (i->sType != TYPE_F32)
+ return;
+
+ // If we're already flushing denorms (and NaN's) to zero, no need for this.
+ if (i->dnz)
+ return;
+
+ // Only certain classes of operations can flush
+ OpClass cls = prog->getTarget()->getOpClass(i->op);
+ if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE &&
+ cls != OPCLASS_CONVERT)
+ return;
+
+ i->ftz = true;
+}
+
bool
NVC0LegalizeSSA::visit(Function *fn)
{
@@ -109,8 +129,11 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
Instruction *next;
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
- if (i->dType == TYPE_F32)
+ if (i->dType == TYPE_F32) {
+ if (prog->getType() != Program::TYPE_COMPUTE)
+ handleFTZ(i);
continue;
+ }
switch (i->op) {
case OP_DIV:
case OP_MOD:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index d8ff5cd3d96..260e101a1fb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -36,6 +36,7 @@ private:
// we want to insert calls to the builtin library only after optimization
void handleDIV(Instruction *); // integer division, modulus
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
+ void handleFTZ(Instruction *);
private:
BuildUtil bld;