summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2016-01-02 22:25:31 -0500
committerIlia Mirkin <[email protected]>2016-01-29 21:22:47 -0500
commitc3083c70823d8f4bfdabcf38f98dfebeff0a2b2b (patch)
treec6a8a683e4d27b075c68a44b9c7ccfa9f850becb /src
parentabe427ebd250377b21204dad07be6a207d30e9a0 (diff)
nv50/ir: add support for BUFFER accesses
This largely leaves the existing image logic alone. When image support is added this will have to be harmonized somehow. Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp98
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp50
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c3
6 files changed, 147 insertions, 11 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 6ad9dd31681..75e5fd843c2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -393,6 +393,9 @@ ImmediateValue::isInteger(const int i) const
case TYPE_S32:
case TYPE_U32:
return reg.data.s32 == i; // as if ...
+ case TYPE_S64:
+ case TYPE_U64:
+ return reg.data.s64 == i; // as if ...
case TYPE_F32:
return reg.data.f32 == static_cast<float>(i);
case TYPE_F64:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 9c4a38f291b..81cd4e9584f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -38,6 +38,7 @@ static nv50_ir::operation translateOpcode(uint opcode);
static nv50_ir::DataFile translateFile(uint file);
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
+static nv50_ir::CacheMode translateCacheMode(uint qualifier);
class Instruction
{
@@ -213,6 +214,12 @@ public:
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
+ nv50_ir::CacheMode getCacheMode() const {
+ if (!insn->Instruction.Memory)
+ return nv50_ir::CACHE_CA;
+ return translateCacheMode(insn->Memory.Qualifier);
+ }
+
inline uint getLabel() { return insn->Label.Label; }
unsigned getSaturate() const { return insn->Instruction.Saturate; }
@@ -366,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
- //case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@@ -436,6 +443,15 @@ static nv50_ir::TexTarget translateTexture(uint tex)
}
}
+static nv50_ir::CacheMode translateCacheMode(uint qualifier)
+{
+ if (qualifier & TGSI_MEMORY_VOLATILE)
+ return nv50_ir::CACHE_CV;
+ if (qualifier & TGSI_MEMORY_COHERENT)
+ return nv50_ir::CACHE_CG;
+ return nv50_ir::CACHE_CA;
+}
+
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
@@ -1210,6 +1226,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_BUFFER:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
@@ -1255,6 +1272,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
indirectTempArrays.insert(insn.getDst(0).getArrayId());
+ } else
+ if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ info->io.globalAccess |= 0x2;
}
}
@@ -1264,13 +1284,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (src.isIndirect(0))
indirectTempArrays.insert(src.getArrayId());
} else
-/*
- if (src.getFile() == TGSI_FILE_RESOURCE) {
- if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
- info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+ if (src.getFile() == TGSI_FILE_BUFFER) {
+ info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
-*/
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
@@ -1752,7 +1769,7 @@ Converter::acquireDst(int d, int c)
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
- if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
+ if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
return NULL;
if (dst.isIndirect(0) ||
@@ -2222,6 +2239,25 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Value *off = fetchSrc(1, c);
+ Symbol *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
+ } else {
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ }
+
+ mkLoad(TYPE_U32, dst0[c], sym, off)->cache = tgsi.getCacheMode();
+ }
+ return;
+ }
+
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
@@ -2298,6 +2334,28 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
+ if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ for (c = 0; c < 4; ++c) {
+ if (!(tgsi.getDst(0).getMask() & (1 << c)))
+ continue;
+
+ Symbol *sym;
+ Value *off;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
+ tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
+ } else {
+ off = fetchSrc(0, 0);
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ }
+
+ mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c))
+ ->cache = tgsi.getCacheMode();
+ }
+ return;
+ }
+
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
@@ -2359,6 +2417,32 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ for (int c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Instruction *insn;
+ Value *off = fetchSrc(1, c);
+ Value *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
+ else
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
+ insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
+ if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
+ insn->setIndirect(0, 0, off);
+ insn->subOp = subOp;
+ if (subOp == NV50_IR_SUBOP_ATOM_CAS)
+ insn->setSrc(2, fetchSrc(3, 0));
+ }
+ for (int c = 0; c < 4; ++c)
+ if (dst0[c])
+ dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+ return;
+ }
+
+
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index dc1ab769b98..638cef921b6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1022,11 +1022,11 @@ NVC0LoweringPass::handleTXLQ(TexInstruction *i)
return true;
}
-
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
SVSemantic sv;
+ Value *ptr = atom->getIndirect(0, 0), *base;
switch (atom->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
@@ -1037,11 +1037,16 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
break;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
+ base = loadResInfo64(NULL, atom->getSrc(0)->reg.fileIndex * 16);
+ assert(base->reg.size == 8);
+ if (ptr)
+ base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
+ assert(base->reg.size == 8);
+ atom->setIndirect(0, 0, base);
return true;
}
- Value *base =
+ base =
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0));
- Value *ptr = atom->getIndirect(0, 0);
atom->setSrc(0, cloneShallow(func, atom->getSrc(0)));
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
@@ -1094,6 +1099,26 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
}
inline Value *
+NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
+{
+ uint8_t b = prog->driver->io.resInfoCBSlot;
+ off += prog->driver->io.suInfoBase;
+
+ return bld.
+ mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr);
+}
+
+inline Value *
+NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
+{
+ uint8_t b = prog->driver->io.resInfoCBSlot;
+ off += prog->driver->io.suInfoBase;
+
+ return bld.
+ mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
+}
+
+inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.msInfoCBSlot;
@@ -1786,6 +1811,7 @@ NVC0LoweringPass::visit(Instruction *i)
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
+ case OP_STORE:
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
@@ -1820,6 +1846,24 @@ NVC0LoweringPass::visit(Instruction *i)
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
+ } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
+ Value *ptr = loadResInfo64(NULL, i->getSrc(0)->reg.fileIndex * 16);
+ // XXX come up with a way not to do this for EVERY little access but
+ // rather to batch these up somehow. Unfortunately we've lost the
+ // information about the field width by the time we get here.
+ Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
+ Value *length = loadResLength32(NULL, i->getSrc(0)->reg.fileIndex * 16);
+ Value *pred = new_LValue(func, FILE_PREDICATE);
+ if (i->src(0).isIndirect(0)) {
+ bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
+ bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
+ }
+ i->setIndirect(0, 0, ptr);
+ bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
+ i->setPredicate(CC_NOT_P, pred);
+ if (i->defExists(0)) {
+ bld.mkMov(i->getDef(0), bld.mkImm(0));
+ }
}
break;
case OP_ATOM:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index adb400a559a..874b81e82b8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -116,6 +116,8 @@ private:
void readTessCoord(LValue *dst, int c);
Value *loadResInfo32(Value *ptr, uint32_t off);
+ Value *loadResInfo64(Value *ptr, uint32_t off);
+ Value *loadResLength32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index eb790d028f1..684998e817b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -3044,7 +3044,7 @@ Instruction::isResultEqual(const Instruction *that) const
if (that->srcExists(s))
return false;
- if (op == OP_LOAD || op == OP_VFETCH) {
+ if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
switch (src(0).getFile()) {
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index c3b53621630..93f211bd5fc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -554,6 +554,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
info->io.resInfoCBSlot = 15;
info->io.sampleInfoBase = 256 + 128;
+ info->io.suInfoBase = 512;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}
@@ -635,6 +636,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
}
*/
if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 26;
+ if (info->io.globalAccess & 0x2)
prog->hdr[0] |= 1 << 16;
if (info->io.fp64)
prog->hdr[0] |= 1 << 27;