diff options
author | Christoph Bumiller <[email protected]> | 2012-05-25 17:27:03 +0200 |
---|---|---|
committer | Christoph Bumiller <[email protected]> | 2012-05-29 15:01:41 +0200 |
commit | 40c224a573f2b763046001e622aafca90f68c693 (patch) | |
tree | f2959fedde5d2c3571c1e2632aecaaeac01f8c65 /src/gallium/drivers/nvc0/codegen | |
parent | 0d818cdacce0299fabe4ac2aa735247c651fdcfa (diff) |
nvc0/ir: fix texture barrier insertion to prevent WAW hazards
Fixes, for instance, object highlighting in Diablo 3 (wine).
Diffstat (limited to 'src/gallium/drivers/nvc0/codegen')
-rw-r--r-- | src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp | 97 |
2 files changed, 78 insertions, 21 deletions
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp index fbd1aa5dfc9..57d5d723c6a 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp @@ -960,7 +960,7 @@ void CodeEmitterNVC0::emitTEXBAR(const Instruction *i) code[0] = 0x00000006 | (i->subOp << 26); code[1] = 0xf0000000; emitPredicate(i); - emitCondCode(i->predSrc >= 0 ? i->cc : CC_ALWAYS, 5); + emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5); } void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index 8fd4541a96a..efb51249115 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -148,7 +148,13 @@ private: }; bool insertTextureBarriers(Function *); inline bool insnDominatedBy(const Instruction *, const Instruction *) const; - void findFirstUses(const Instruction *, std::list<TexUse>&); + void findFirstUses(const Instruction *tex, const Instruction *def, + std::list<TexUse>&); + void findOverwritingDefs(const Instruction *tex, Instruction *insn, + const BasicBlock *term, + std::list<TexUse>&); + void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *); + const Instruction *recurseDef(const Instruction *); private: LValue *r63; @@ -170,37 +176,88 @@ NVC0LegalizePostRA::insnDominatedBy(const Instruction *later, } void -NVC0LegalizePostRA::findFirstUses(const Instruction *insn, +NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, + Instruction *usei, const Instruction *insn) +{ + bool add = true; + for (std::list<TexUse>::iterator it = uses.begin(); + it != uses.end();) { + if (insnDominatedBy(usei, it->insn)) { + add = false; + break; + } + if (insnDominatedBy(it->insn, usei)) + it = uses.erase(it); + else + ++it; + } + if (add) + uses.push_back(TexUse(usei, insn)); +} + +void +NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, + Instruction *insn, + const BasicBlock *term, + std::list<TexUse> &uses) +{ + while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) + insn = insn->getSrc(0)->getUniqueInsn(); + + if (!insn || !insn->bb->reachableBy(texi->bb, term)) + return; + + switch (insn->op) { + /* Values not connected to the tex's definition through any of these should + * not be conflicting. + */ + case OP_SPLIT: + case OP_MERGE: + case OP_PHI: + case OP_UNION: + /* recurse again */ + for (int s = 0; insn->srcExists(s); ++s) + findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, + uses); + break; + default: + // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ? + addTexUse(uses, insn, texi); + break; + } +} + +void +NVC0LegalizePostRA::findFirstUses(const Instruction *texi, + const Instruction *insn, std::list<TexUse> &uses) { for (int d = 0; insn->defExists(d); ++d) { Value *v = insn->getDef(d); for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) { Instruction *usei = (*u)->getInsn(); + + if (usei->op == OP_PHI || usei->op == OP_UNION) { + // need a barrier before WAW cases + for (int s = 0; usei->srcExists(s); ++s) { + Instruction *defi = usei->getSrc(s)->getUniqueInsn(); + if (defi && &usei->src(s) != *u) + findOverwritingDefs(texi, defi, usei->bb, uses); + } + } + if (usei->op == OP_SPLIT || + usei->op == OP_MERGE || usei->op == OP_PHI || usei->op == OP_UNION) { // these uses don't manifest in the machine code - findFirstUses(usei, uses); + findFirstUses(texi, usei, uses); } else if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) && usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { - findFirstUses(usei, uses); + findFirstUses(texi, usei, uses); } else { - bool add = true; - for (std::list<TexUse>::iterator it = uses.begin(); - it != uses.end();) { - if (insnDominatedBy(usei, it->insn)) { - add = false; - break; - } - if (insnDominatedBy(it->insn, usei)) - it = uses.erase(it); - else - ++it; - } - if (add) - uses.push_back(TexUse(usei, insn)); + addTexUse(uses, usei, insn); } } } @@ -255,7 +312,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn) if (!uses) return false; for (size_t i = 0; i < texes.size(); ++i) - findFirstUses(texes[i], uses[i]); + findFirstUses(texes[i], texes[i], uses[i]); // determine the barrier level at each use for (size_t i = 0; i < texes.size(); ++i) { @@ -324,7 +381,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn) limitS.resize(fn->allBBlocks.getSize()); // cull unneeded barriers (should do that earlier, but for simplicity) - IteratorRef bi = fn->cfg.iteratorDFS(true); + IteratorRef bi = fn->cfg.iteratorCFG(); // first calculate min/max outstanding TEXes for each BB for (bi->reset(); !bi->end(); bi->next()) { Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); |