summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2016-01-18 23:34:01 -0500
committerIlia Mirkin <[email protected]>2016-01-29 21:22:48 -0500
commit7b9a77b905bda3003dc57efb99879499ebc4ba41 (patch)
tree889d43035d7033580f199b21dc8173828b15807c /src/gallium/drivers
parent2c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23 (diff)
nv50/ir: add support for indirect buffer loading
Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp19
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp22
2 files changed, 31 insertions, 10 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index f5448293e54..735e2891cf2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -2253,7 +2253,10 @@ Converter::handleLOAD(Value *dst0[4])
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
}
- mkLoad(TYPE_U32, dst0[c], sym, off)->cache = tgsi.getCacheMode();
+ Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
+ ld->cache = tgsi.getCacheMode();
+ if (tgsi.getSrc(0).isIndirect(0))
+ ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
}
return;
}
@@ -2350,8 +2353,10 @@ Converter::handleSTORE()
sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
}
- mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c))
- ->cache = tgsi.getCacheMode();
+ Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
+ st->cache = tgsi.getCacheMode();
+ if (tgsi.getDst(0).isIndirect(0))
+ st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
}
return;
}
@@ -2432,6 +2437,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
insn->setIndirect(0, 0, off);
+ if (tgsi.getSrc(0).isIndirect(0))
+ insn->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
insn->subOp = subOp;
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
insn->setSrc(2, fetchSrc(3, 0));
@@ -3200,8 +3207,10 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
case TGSI_OPCODE_RESQ:
- mkOp1(OP_SUQ, TYPE_U32, dst0[0],
- makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0));
+ geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0],
+ makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0));
+ if (tgsi.getSrc(0).isIndirect(0))
+ geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
break;
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_UBFE:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 7f65b13b55f..0a77dce85c2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1026,7 +1026,10 @@ bool
NVC0LoweringPass::handleSUQ(Instruction *suq)
{
suq->op = OP_MOV;
- suq->setSrc(0, loadResLength32(NULL, suq->getSrc(0)->reg.fileIndex * 16));
+ suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
+ suq->getSrc(0)->reg.fileIndex * 16));
+ suq->setIndirect(0, 0, NULL);
+ suq->setIndirect(0, 1, NULL);
return true;
}
@@ -1034,7 +1037,7 @@ bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
SVSemantic sv;
- Value *ptr = atom->getIndirect(0, 0), *base;
+ Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base;
switch (atom->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
@@ -1045,7 +1048,7 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
break;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
- base = loadResInfo64(NULL, atom->getSrc(0)->reg.fileIndex * 16);
+ base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
assert(base->reg.size == 8);
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
@@ -1060,6 +1063,7 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr);
+ atom->setIndirect(0, 1, NULL);
atom->setIndirect(0, 0, base);
return true;
@@ -1112,6 +1116,9 @@ NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
+ if (ptr)
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
+
return bld.
mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr);
}
@@ -1122,6 +1129,9 @@ NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
+ if (ptr)
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
+
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
}
@@ -1855,17 +1865,19 @@ NVC0LoweringPass::visit(Instruction *i)
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
} else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
- Value *ptr = loadResInfo64(NULL, i->getSrc(0)->reg.fileIndex * 16);
+ Value *ind = i->getIndirect(0, 1);
+ Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
// XXX come up with a way not to do this for EVERY little access but
// rather to batch these up somehow. Unfortunately we've lost the
// information about the field width by the time we get here.
Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
- Value *length = loadResLength32(NULL, i->getSrc(0)->reg.fileIndex * 16);
+ Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16);
Value *pred = new_LValue(func, FILE_PREDICATE);
if (i->src(0).isIndirect(0)) {
bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
}
+ i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
i->setPredicate(CC_NOT_P, pred);