diff options
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 25 |
1 files changed, 24 insertions, 1 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 18e8e67bc87..b61f3c49bb9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -96,6 +96,26 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); } +void +NVC0LegalizeSSA::handleFTZ(Instruction *i) +{ + // Only want to flush float inputs + if (i->sType != TYPE_F32) + return; + + // If we're already flushing denorms (and NaN's) to zero, no need for this. + if (i->dnz) + return; + + // Only certain classes of operations can flush + OpClass cls = prog->getTarget()->getOpClass(i->op); + if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE && + cls != OPCLASS_CONVERT) + return; + + i->ftz = true; +} + bool NVC0LegalizeSSA::visit(Function *fn) { @@ -109,8 +129,11 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) Instruction *next; for (Instruction *i = bb->getEntry(); i; i = next) { next = i->next; - if (i->dType == TYPE_F32) + if (i->dType == TYPE_F32) { + if (prog->getType() != Program::TYPE_COMPUTE) + handleFTZ(i); continue; + } switch (i->op) { case OP_DIV: case OP_MOD: |