aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2012-04-29 17:56:57 +0200
committerChristoph Bumiller <[email protected]>2012-04-29 17:56:57 +0200
commit00fe442253744c4c4e7e68da44d6983da053968b (patch)
treea2220fceb8ffa22edd97b4d725b4e3ee231a19d0
parent163b290f886c69a233c71799613eb74fb2668085 (diff)
nvc0/ir: implement better placement of texture barriers
Put them before first uses instead of right after the texturing instruction and cull unnecessary barriers.
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h3
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp46
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_graph.h4
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp1
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp7
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_util.h1
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp276
8 files changed, 327 insertions, 13 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index c299cab3f52..da9042066ad 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -135,6 +135,8 @@ enum operation
OP_LAST
};
+// various instruction-specific modifier definitions Instruction::subOp
+// MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs)
#define NV50_IR_SUBOP_MUL_HIGH 1
#define NV50_IR_SUBOP_EMIT_RESTART 1
#define NV50_IR_SUBOP_LDC_IL 1
@@ -143,6 +145,7 @@ enum operation
#define NV50_IR_SUBOP_SHIFT_WRAP 1
#define NV50_IR_SUBOP_EMU_PRERET 1
#define NV50_IR_SUBOP_TEXBAR(n) n
+#define NV50_IR_SUBOP_MOV_FINAL 1
enum DataType
{
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp
index 90147b6bfb8..f1bff973636 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp
@@ -21,6 +21,9 @@
*/
#include "nv50_ir_graph.h"
+#include <limits>
+#include <list>
+#include "nv50_ir.h"
namespace nv50_ir {
@@ -228,8 +231,7 @@ public:
virtual bool end() const { return pos >= count; }
virtual void next() { if (pos < count) ++pos; }
virtual void *get() const { return nodes[pos]; }
-
- void reset() { pos = 0; }
+ virtual void reset() { pos = 0; }
protected:
Graph::Node **nodes;
@@ -274,6 +276,7 @@ public:
virtual void *get() const { return nodes[pos]; }
virtual bool end() const { return pos >= count; }
virtual void next() { if (pos < count) ++pos; }
+ virtual void reset() { pos = 0; }
private:
void search(Graph::Node *node, const int sequence)
@@ -389,4 +392,43 @@ void Graph::classifyDFS(Node *curr, int& seq)
curr->tag = 0;
}
+// @dist is indexed by Node::tag, returns -1 if no path found
+int
+Graph::findLightestPathWeight(Node *a, Node *b, const std::vector<int> &weight)
+{
+ std::vector<int> path(weight.size(), std::numeric_limits<int>::max());
+ std::list<Node *> nodeList;
+ const int seq = nextSequence();
+
+ path[a->tag] = 0;
+ for (Node *c = a; c && c != b;) {
+ const int p = path[c->tag] + weight[c->tag];
+ for (EdgeIterator ei = c->outgoing(); !ei.end(); ei.next()) {
+ Node *t = ei.getNode();
+ if (t->getSequence() < seq) {
+ if (path[t->tag] == std::numeric_limits<int>::max())
+ nodeList.push_front(t);
+ if (p < path[t->tag])
+ path[t->tag] = p;
+ }
+ }
+ c->visit(seq);
+ Node *next = NULL;
+ for (std::list<Node *>::iterator n = nodeList.begin();
+ n != nodeList.end(); ++n) {
+ if (!next || path[(*n)->tag] < path[next->tag])
+ next = *n;
+ if ((*n) == c) {
+ // erase visited
+ n = nodeList.erase(n);
+ --n;
+ }
+ }
+ c = next;
+ }
+ if (path[b->tag] == std::numeric_limits<int>::max())
+ return -1;
+ return path[b->tag];
+}
+
} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h
index cfa73c3dd1a..9ef317f943c 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h
@@ -24,6 +24,7 @@
#define __NV50_IR_GRAPH_H__
#include "nv50_ir_util.h"
+#include <vector>
namespace nv50_ir {
@@ -165,6 +166,9 @@ public:
void classifyEdges();
+ // @weights: indexed by Node::tag
+ int findLightestPathWeight(Node *, Node *, const std::vector<int>& weights);
+
private:
void classifyDFS(Node *, int&);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
index 30d8acee3bc..27373b4cc47 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -998,6 +998,7 @@ NV50LoweringPreSSA::handleEXPORT(Instruction *i)
int id = i->getSrc(0)->reg.data.offset / 4; // in 32 bit reg units
i->op = OP_MOV;
+ i->subOp = NV50_IR_SUBOP_MOV_FINAL;
i->src(0).set(i->src(1));
i->setSrc(1, NULL);
i->setDef(0, new_LValue(func, FILE_GPR));
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
index 7d0ebc69710..5bc3a450779 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
@@ -33,7 +33,7 @@ namespace nv50_ir {
bool
Instruction::isNop() const
{
- if (op == OP_CONSTRAINT || op == OP_PHI)
+ if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT)
return true;
if (terminator || join) // XXX: should terminator imply flow ?
return false;
@@ -1855,7 +1855,10 @@ FlatteningPass::visit(BasicBlock *bb)
Instruction *insn = bb->getExit();
if (insn && insn->op == OP_JOIN && !insn->getPredicate()) {
insn = insn->prev;
- if (insn && !insn->getPredicate() && !insn->asFlow() && !insn->isNop()) {
+ if (insn && !insn->getPredicate() &&
+ !insn->asFlow() &&
+ insn->op != OP_TEXBAR &&
+ !insn->isNop()) {
insn->join = 1;
bb->remove(bb->getExit());
return true;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp
index 5d2aa587a31..77edaa6067a 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp
@@ -1615,7 +1615,6 @@ GCRA::resolveSplitsAndMerges()
v->join = v;
reg += v->reg.size;
}
- delete_Instruction(prog, split);
}
splits.clear();
@@ -1630,7 +1629,6 @@ GCRA::resolveSplitsAndMerges()
v->join = v;
reg += v->reg.size;
}
- delete_Instruction(prog, merge);
}
merges.clear();
}
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_util.h
index f348f1811b8..a0020789c11 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_util.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_util.h
@@ -91,6 +91,7 @@ public:
virtual void next() = 0;
virtual void *get() const = 0;
virtual bool end() const = 0; // if true, get will return 0
+ virtual void reset() { assert(0); } // only for graph iterators
};
typedef std::auto_ptr<Iterator> IteratorRef;
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
index 318d345efdb..02ae9fd5d0e 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -25,6 +25,8 @@
#include "nv50_ir_target_nvc0.h"
+#include <limits>
+
namespace nv50_ir {
#define QOP_ADD 0
@@ -129,8 +131,26 @@ private:
bool tryReplaceContWithBra(BasicBlock *);
void propagateJoin(BasicBlock *);
- LValue *r63;
+ struct TexUse
+ {
+ TexUse(Instruction *use, const Instruction *tex)
+ : insn(use), tex(tex), level(-1) { }
+ Instruction *insn;
+ const Instruction *tex; // or split / mov
+ int level;
+ };
+ struct Limits
+ {
+ Limits() { }
+ Limits(int min, int max) : min(min), max(max) { }
+ int min, max;
+ };
+ bool insertTextureBarriers(Function *);
+ inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
+ void findFirstUses(const Instruction *, std::list<TexUse>&);
+private:
+ LValue *r63;
const bool needTexBar;
};
@@ -140,8 +160,255 @@ NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog)
}
bool
+NVC0LegalizePostRA::insnDominatedBy(const Instruction *later,
+ const Instruction *early) const
+{
+ if (early->bb == later->bb)
+ return early->serial < later->serial;
+ return later->bb->dominatedBy(early->bb);
+}
+
+void
+NVC0LegalizePostRA::findFirstUses(const Instruction *insn,
+ std::list<TexUse> &uses)
+{
+ for (int d = 0; insn->defExists(d); ++d) {
+ Value *v = insn->getDef(d);
+ for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
+ Instruction *usei = (*u)->getInsn();
+ if (usei->op == OP_SPLIT ||
+ usei->op == OP_PHI ||
+ usei->op == OP_UNION) {
+ // these uses don't manifest in the machine code
+ findFirstUses(usei, uses);
+ } else
+ if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
+ usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
+ findFirstUses(usei, uses);
+ } else {
+ bool add = true;
+ for (std::list<TexUse>::iterator it = uses.begin();
+ it != uses.end();) {
+ if (insnDominatedBy(usei, it->insn)) {
+ add = false;
+ break;
+ }
+ if (insnDominatedBy(it->insn, usei))
+ it = uses.erase(it);
+ else
+ ++it;
+ }
+ if (add)
+ uses.push_back(TexUse(usei, insn));
+ }
+ }
+ }
+}
+
+// Texture barriers:
+// This pass is a bit long and ugly and can probably be optimized.
+//
+// 1. obtain a list of TEXes and their outputs' first use(s)
+// 2. calculate the barrier level of each first use (minimal number of TEXes,
+// over all paths, between the TEX and the use in question)
+// 3. for each barrier, if all paths from the source TEX to that barrier
+// contain a barrier of lesser level, it can be culled
+bool
+NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
+{
+ std::list<TexUse> *uses;
+ std::vector<Instruction *> texes;
+ std::vector<int> bbFirstTex;
+ std::vector<int> bbFirstUse;
+ std::vector<int> texCounts;
+ std::vector<TexUse> useVec;
+ ArrayList insns;
+
+ fn->orderInstructions(insns);
+
+ texCounts.resize(fn->allBBlocks.getSize(), 0);
+ bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize());
+ bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize());
+
+ // tag BB CFG nodes by their id for later
+ for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) {
+ BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get());
+ if (bb)
+ bb->cfg.tag = bb->getId();
+ }
+
+ // gather the first uses for each TEX
+ for (int i = 0; i < insns.getSize(); ++i) {
+ Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i));
+ if (isTextureOp(tex->op)) {
+ texes.push_back(tex);
+ if (!texCounts.at(tex->bb->getId()))
+ bbFirstTex[tex->bb->getId()] = texes.size() - 1;
+ texCounts[tex->bb->getId()]++;
+ }
+ }
+ insns.clear();
+ if (texes.empty())
+ return false;
+ uses = new std::list<TexUse>[texes.size()];
+ if (!uses)
+ return false;
+ for (size_t i = 0; i < texes.size(); ++i)
+ findFirstUses(texes[i], uses[i]);
+
+ // determine the barrier level at each use
+ for (size_t i = 0; i < texes.size(); ++i) {
+ for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end();
+ ++u) {
+ BasicBlock *tb = texes[i]->bb;
+ BasicBlock *ub = u->insn->bb;
+ if (tb == ub) {
+ u->level = 0;
+ for (size_t j = i + 1; j < texes.size() &&
+ texes[j]->bb == tb && texes[j]->serial < u->insn->serial;
+ ++j)
+ u->level++;
+ } else {
+ u->level = fn->cfg.findLightestPathWeight(&tb->cfg,
+ &ub->cfg, texCounts);
+ if (u->level < 0) {
+ WARN("Failed to find path TEX -> TEXBAR\n");
+ u->level = 0;
+ continue;
+ }
+ // this counted all TEXes in the origin block, correct that
+ u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */;
+ // and did not count the TEXes in the destination block, add those
+ for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() &&
+ texes[j]->bb == ub && texes[j]->serial < u->insn->serial;
+ ++j)
+ u->level++;
+ }
+ assert(u->level >= 0);
+ useVec.push_back(*u);
+ }
+ }
+ delete[] uses;
+ uses = NULL;
+
+ // insert the barriers
+ for (size_t i = 0; i < useVec.size(); ++i) {
+ Instruction *prev = useVec[i].insn->prev;
+ if (useVec[i].level < 0)
+ continue;
+ if (prev && prev->op == OP_TEXBAR) {
+ if (prev->subOp > useVec[i].level)
+ prev->subOp = useVec[i].level;
+ prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0));
+ } else {
+ Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE);
+ bar->fixed = 1;
+ bar->subOp = useVec[i].level;
+ // make use explicit to ease latency calculation
+ bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0));
+ useVec[i].insn->bb->insertBefore(useVec[i].insn, bar);
+ }
+ }
+
+ if (fn->getProgram()->optLevel < 3) {
+ if (uses)
+ delete[] uses;
+ return true;
+ }
+
+ std::vector<Limits> limitT, limitB, limitS; // entry, exit, single
+
+ limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0));
+ limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0));
+ limitS.resize(fn->allBBlocks.getSize());
+
+ // cull unneeded barriers (should do that earlier, but for simplicity)
+ IteratorRef bi = fn->cfg.iteratorDFS(true);
+ // first calculate min/max outstanding TEXes for each BB
+ for (bi->reset(); !bi->end(); bi->next()) {
+ Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
+ BasicBlock *bb = BasicBlock::get(n);
+ int min = 0;
+ int max = std::numeric_limits<int>::max();
+ for (Instruction *i = bb->getFirst(); i; i = i->next) {
+ if (isTextureOp(i->op)) {
+ min++;
+ if (max < std::numeric_limits<int>::max())
+ max++;
+ } else
+ if (i->op == OP_TEXBAR) {
+ min = MIN2(min, i->subOp);
+ max = MIN2(max, i->subOp);
+ }
+ }
+ // limits when looking at an isolated block
+ limitS[bb->getId()].min = min;
+ limitS[bb->getId()].max = max;
+ }
+ // propagate the min/max values
+ for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) {
+ for (bi->reset(); !bi->end(); bi->next()) {
+ Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
+ BasicBlock *bb = BasicBlock::get(n);
+ const int bbId = bb->getId();
+ for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) {
+ BasicBlock *in = BasicBlock::get(ei.getNode());
+ const int inId = in->getId();
+ limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min);
+ limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max);
+ }
+ // I just hope this is correct ...
+ if (limitS[bbId].max == std::numeric_limits<int>::max()) {
+ // no barrier
+ limitB[bbId].min = limitT[bbId].min + limitS[bbId].min;
+ limitB[bbId].max = limitT[bbId].max + limitS[bbId].min;
+ } else {
+ // block contained a barrier
+ limitB[bbId].min = MIN2(limitS[bbId].max,
+ limitT[bbId].min + limitS[bbId].min);
+ limitB[bbId].max = MIN2(limitS[bbId].max,
+ limitT[bbId].max + limitS[bbId].min);
+ }
+ }
+ }
+ // finally delete unnecessary barriers
+ for (bi->reset(); !bi->end(); bi->next()) {
+ Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
+ BasicBlock *bb = BasicBlock::get(n);
+ Instruction *prev = NULL;
+ Instruction *next;
+ int max = limitT[bb->getId()].max;
+ for (Instruction *i = bb->getFirst(); i; i = next) {
+ next = i->next;
+ if (i->op == OP_TEXBAR) {
+ if (i->subOp >= max) {
+ delete_Instruction(prog, i);
+ } else {
+ max = i->subOp;
+ if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) {
+ delete_Instruction(prog, prev);
+ prev = NULL;
+ }
+ }
+ } else
+ if (isTextureOp(i->op)) {
+ max++;
+ }
+ if (!i->isNop())
+ prev = i;
+ }
+ }
+ if (uses)
+ delete[] uses;
+ return true;
+}
+
+bool
NVC0LegalizePostRA::visit(Function *fn)
{
+ if (needTexBar)
+ insertTextureBarriers(fn);
+
r63 = new_LValue(fn, FILE_GPR);
r63->reg.data.id = 63;
return true;
@@ -235,12 +502,6 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
} else
if (i->isNop()) {
bb->remove(i);
- } else
- if (needTexBar && isTextureOp(i->op)) {
- Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE);
- bar->fixed = 1;
- bar->subOp = 0;
- bb->insertAfter(i, bar);
} else {
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
@@ -673,6 +934,7 @@ NVC0LoweringPass::handleEXPORT(Instruction *i)
if (i->src(0).isIndirect(0)) // TODO, ugly
return false;
i->op = OP_MOV;
+ i->subOp = NV50_IR_SUBOP_MOV_FINAL;
i->src(0).set(i->src(1));
i->setSrc(1, NULL);
i->setDef(0, new_LValue(func, FILE_GPR));