summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2016-01-26 01:32:23 +0100
committerSamuel Pitoiset <[email protected]>2016-02-21 10:42:32 +0100
commit14a810e9d0dbf52e547ae6e16a68487d7cb92004 (patch)
tree84968877f45846ab619a89b2a49c4a201ec88a1c /src/gallium/drivers/nouveau
parente0371e63df173be38a086a5d816eeaf0e8cbe7eb (diff)
nv50/ir: add atomics support on shared memory for Fermi
Changes from v3: - move the previous OP_SELP change to the previous commit Changes from v2: - make sure the op is OP_SELP when emitting the predicate and add one assert - use bld.getSSA() for mkOp2() - add cross edge between tryLockAndSetBB and joinBB Signed-off-by: Samuel Pitoiset <[email protected]> Acked-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp103
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h1
2 files changed, 102 insertions, 2 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index e7cb54bc426..21a6f1eebf6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq)
return true;
}
+void
+NVC0LoweringPass::handleSharedATOM(Instruction *atom)
+{
+ assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
+
+ BasicBlock *currBB = atom->bb;
+ BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false);
+ BasicBlock *joinBB = atom->bb->splitAfter(atom);
+
+ bld.setPosition(currBB, true);
+ assert(!currBB->joinAt);
+ currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
+
+ bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL);
+ currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE);
+
+ bld.setPosition(tryLockAndSetBB, true);
+
+ Instruction *ld =
+ bld.mkLoad(TYPE_U32, atom->getDef(0),
+ bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
+ ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
+ ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
+
+ Value *stVal;
+ if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
+ // Read the old value, and write the new one.
+ stVal = atom->getSrc(1);
+ } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ CmpInstruction *set =
+ bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, ld->getDef(0), atom->getSrc(1));
+ set->setPredicate(CC_P, ld->getDef(1));
+
+ CmpInstruction *selp =
+ bld.mkCmp(OP_SELP, CC_NOT_P, TYPE_U32, bld.getSSA(4, FILE_ADDRESS),
+ TYPE_U32, ld->getDef(0), atom->getSrc(2),
+ set->getDef(0));
+ selp->setPredicate(CC_P, ld->getDef(1));
+
+ stVal = selp->getDef(0);
+ } else {
+ operation op;
+
+ switch (atom->subOp) {
+ case NV50_IR_SUBOP_ATOM_ADD:
+ op = OP_ADD;
+ break;
+ case NV50_IR_SUBOP_ATOM_AND:
+ op = OP_AND;
+ break;
+ case NV50_IR_SUBOP_ATOM_OR:
+ op = OP_OR;
+ break;
+ case NV50_IR_SUBOP_ATOM_XOR:
+ op = OP_XOR;
+ break;
+ case NV50_IR_SUBOP_ATOM_MIN:
+ op = OP_MIN;
+ break;
+ case NV50_IR_SUBOP_ATOM_MAX:
+ op = OP_MAX;
+ break;
+ default:
+ assert(0);
+ }
+
+ Instruction *i =
+ bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0),
+ atom->getSrc(1));
+ i->setPredicate(CC_P, ld->getDef(1));
+
+ stVal = i->getDef(0);
+ }
+
+ Instruction *st =
+ bld.mkStore(OP_STORE, TYPE_U32,
+ bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
+ NULL, stVal);
+ st->setPredicate(CC_P, ld->getDef(1));
+ st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED;
+
+ // Loop until the lock is acquired.
+ bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1));
+ tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK);
+ tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS);
+ bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
+
+ bld.remove(atom);
+
+ bld.setPosition(joinBB, false);
+ bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
+}
+
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
@@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
sv = SV_LBASE;
break;
case FILE_MEMORY_SHARED:
- sv = SV_SBASE;
- break;
+ handleSharedATOM(atom);
+ return true;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
@@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
bool
NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
{
+ if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
+ // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
+ return false;
+ }
+
if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
cas->subOp != NV50_IR_SUBOP_ATOM_EXCH)
return false;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 09ec7e69ddc..6eb8aff3036 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -105,6 +105,7 @@ protected:
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
+ void handleSharedATOM(Instruction *);
void checkPredicate(Instruction *);