aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2012-05-25 17:27:03 +0200
committerChristoph Bumiller <[email protected]>2012-05-29 15:01:41 +0200
commit40c224a573f2b763046001e622aafca90f68c693 (patch)
treef2959fedde5d2c3571c1e2632aecaaeac01f8c65
parent0d818cdacce0299fabe4ac2aa735247c651fdcfa (diff)
nvc0/ir: fix texture barrier insertion to prevent WAW hazards
Fixes, for instance, object highlighting in Diablo 3 (wine).
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp12
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_graph.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h2
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp2
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp97
6 files changed, 88 insertions, 29 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index 9b47e3e13c1..0b47c32527f 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -864,7 +864,7 @@ public:
inline bool isTerminated() const { return exit && exit->terminator; }
bool dominatedBy(BasicBlock *bb);
- inline bool reachableBy(BasicBlock *by, BasicBlock *term);
+ inline bool reachableBy(const BasicBlock *by, const BasicBlock *term);
// returns mask of conditional out blocks
// e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp
index f1bff973636..33e35eea950 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp
@@ -23,6 +23,7 @@
#include "nv50_ir_graph.h"
#include <limits>
#include <list>
+#include <stack>
#include "nv50_ir.h"
namespace nv50_ir {
@@ -165,16 +166,17 @@ Graph::Edge::Edge(Node *org, Node *tgt, Type kind)
}
bool
-Graph::Node::reachableBy(Node *node, Node *term)
+Graph::Node::reachableBy(const Node *node, const Node *term) const
{
- Stack stack;
- Node *pos;
+ std::stack<const Node *> stack;
+ const Node *pos = NULL;
const int seq = graph->nextSequence();
stack.push(node);
- while (stack.getSize()) {
- pos = reinterpret_cast<Node *>(stack.pop().u.p);
+ while (!stack.empty()) {
+ pos = stack.top();
+ stack.pop();
if (pos == this)
return true;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h
index 9ef317f943c..3bf84ba1e36 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h
@@ -117,7 +117,7 @@ public:
inline Node *parent() const; // returns NULL if count(incident edges) != 1
- bool reachableBy(Node *node, Node *term);
+ bool reachableBy(const Node *node, const Node *term) const;
inline bool visit(int);
inline int getSequence() const;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
index b62431f1e31..ab4c98fbcd7 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
@@ -359,7 +359,7 @@ Value *Value::get(Iterator &it)
return reinterpret_cast<Value *>(it.get());
}
-bool BasicBlock::reachableBy(BasicBlock *by, BasicBlock *term)
+bool BasicBlock::reachableBy(const BasicBlock *by, const BasicBlock *term)
{
return cfg.reachableBy(&by->cfg, &term->cfg);
}
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
index fbd1aa5dfc9..57d5d723c6a 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
@@ -960,7 +960,7 @@ void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
code[0] = 0x00000006 | (i->subOp << 26);
code[1] = 0xf0000000;
emitPredicate(i);
- emitCondCode(i->predSrc >= 0 ? i->cc : CC_ALWAYS, 5);
+ emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
}
void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
index 8fd4541a96a..efb51249115 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -148,7 +148,13 @@ private:
};
bool insertTextureBarriers(Function *);
inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
- void findFirstUses(const Instruction *, std::list<TexUse>&);
+ void findFirstUses(const Instruction *tex, const Instruction *def,
+ std::list<TexUse>&);
+ void findOverwritingDefs(const Instruction *tex, Instruction *insn,
+ const BasicBlock *term,
+ std::list<TexUse>&);
+ void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *);
+ const Instruction *recurseDef(const Instruction *);
private:
LValue *r63;
@@ -170,37 +176,88 @@ NVC0LegalizePostRA::insnDominatedBy(const Instruction *later,
}
void
-NVC0LegalizePostRA::findFirstUses(const Instruction *insn,
+NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
+ Instruction *usei, const Instruction *insn)
+{
+ bool add = true;
+ for (std::list<TexUse>::iterator it = uses.begin();
+ it != uses.end();) {
+ if (insnDominatedBy(usei, it->insn)) {
+ add = false;
+ break;
+ }
+ if (insnDominatedBy(it->insn, usei))
+ it = uses.erase(it);
+ else
+ ++it;
+ }
+ if (add)
+ uses.push_back(TexUse(usei, insn));
+}
+
+void
+NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
+ Instruction *insn,
+ const BasicBlock *term,
+ std::list<TexUse> &uses)
+{
+ while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
+ insn = insn->getSrc(0)->getUniqueInsn();
+
+ if (!insn || !insn->bb->reachableBy(texi->bb, term))
+ return;
+
+ switch (insn->op) {
+ /* Values not connected to the tex's definition through any of these should
+ * not be conflicting.
+ */
+ case OP_SPLIT:
+ case OP_MERGE:
+ case OP_PHI:
+ case OP_UNION:
+ /* recurse again */
+ for (int s = 0; insn->srcExists(s); ++s)
+ findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term,
+ uses);
+ break;
+ default:
+ // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ?
+ addTexUse(uses, insn, texi);
+ break;
+ }
+}
+
+void
+NVC0LegalizePostRA::findFirstUses(const Instruction *texi,
+ const Instruction *insn,
std::list<TexUse> &uses)
{
for (int d = 0; insn->defExists(d); ++d) {
Value *v = insn->getDef(d);
for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
Instruction *usei = (*u)->getInsn();
+
+ if (usei->op == OP_PHI || usei->op == OP_UNION) {
+ // need a barrier before WAW cases
+ for (int s = 0; usei->srcExists(s); ++s) {
+ Instruction *defi = usei->getSrc(s)->getUniqueInsn();
+ if (defi && &usei->src(s) != *u)
+ findOverwritingDefs(texi, defi, usei->bb, uses);
+ }
+ }
+
if (usei->op == OP_SPLIT ||
+ usei->op == OP_MERGE ||
usei->op == OP_PHI ||
usei->op == OP_UNION) {
// these uses don't manifest in the machine code
- findFirstUses(usei, uses);
+ findFirstUses(texi, usei, uses);
} else
if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
- findFirstUses(usei, uses);
+ findFirstUses(texi, usei, uses);
} else {
- bool add = true;
- for (std::list<TexUse>::iterator it = uses.begin();
- it != uses.end();) {
- if (insnDominatedBy(usei, it->insn)) {
- add = false;
- break;
- }
- if (insnDominatedBy(it->insn, usei))
- it = uses.erase(it);
- else
- ++it;
- }
- if (add)
- uses.push_back(TexUse(usei, insn));
+ addTexUse(uses, usei, insn);
}
}
}
@@ -255,7 +312,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
if (!uses)
return false;
for (size_t i = 0; i < texes.size(); ++i)
- findFirstUses(texes[i], uses[i]);
+ findFirstUses(texes[i], texes[i], uses[i]);
// determine the barrier level at each use
for (size_t i = 0; i < texes.size(); ++i) {
@@ -324,7 +381,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
limitS.resize(fn->allBBlocks.getSize());
// cull unneeded barriers (should do that earlier, but for simplicity)
- IteratorRef bi = fn->cfg.iteratorDFS(true);
+ IteratorRef bi = fn->cfg.iteratorCFG();
// first calculate min/max outstanding TEXes for each BB
for (bi->reset(); !bi->end(); bi->next()) {
Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());