summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp148
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp57
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h12
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c10
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c100
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c5
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c5
11 files changed, 272 insertions, 75 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 520d1d6743e..3d0782f86b5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -180,6 +180,7 @@ struct nv50_ir_prog_info
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
+ uint16_t bindlessBase; /* base address for bindless image info (nve4) */
uint16_t bufInfoBase; /* base address for buffer info */
uint16_t sampleInfoBase; /* base address for sample positions */
uint8_t msInfoCBSlot; /* cX[] used for multisample info */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 6c615806b83..09b5228127a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -20,6 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "tgsi/tgsi_build.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_util.h"
@@ -163,6 +164,12 @@ public:
return SrcRegister(fdr->Indirect);
}
+ struct tgsi_full_src_register asSrc()
+ {
+ assert(fdr);
+ return tgsi_full_src_register_from_dst(fdr);
+ }
+
int getArrayId() const
{
if (isIndirect(0))
@@ -1503,13 +1510,6 @@ void Source::scanInstructionSrc(const Instruction& insn,
if (src.isIndirect(0))
indirectTempArrays.insert(src.getArrayId());
} else
- if (src.getFile() == TGSI_FILE_BUFFER ||
- src.getFile() == TGSI_FILE_IMAGE ||
- (src.getFile() == TGSI_FILE_MEMORY &&
- memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
- info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
- 0x1 : 0x2;
- } else
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
@@ -1580,6 +1580,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (isEdgeFlagPassthrough(insn))
info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
} else
+ if (dst.getFile() != TGSI_FILE_MEMORY &&
+ insn.getOpcode() == TGSI_OPCODE_STORE) {
+ info->io.globalAccess |= 0x2;
+ } else
if (dst.getFile() == TGSI_FILE_TEMPORARY) {
if (dst.isIndirect(0))
indirectTempArrays.insert(dst.getArrayId());
@@ -1592,6 +1596,29 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
}
}
+ if (insn.srcCount() && (
+ insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||
+ memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==
+ TGSI_MEMORY_TYPE_GLOBAL)) {
+ switch (insn.getOpcode()) {
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMAX:
+ case TGSI_OPCODE_LOAD:
+ info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+ 0x1 : 0x2;
+ break;
+ }
+ }
+
+
for (unsigned s = 0; s < insn.srcCount(); ++s)
scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
@@ -1648,6 +1675,7 @@ private:
Value *getOutputBase(int s);
DataArray *getArrayForFile(unsigned file, int idx);
Value *fetchSrc(int s, int c);
+ Value *fetchDst(int d, int c);
Value *acquireDst(int d, int c);
void storeDst(int d, int c, Value *);
@@ -1956,6 +1984,47 @@ Converter::fetchSrc(int s, int c)
return applySrcMod(res, s, c);
}
+Value *
+Converter::fetchDst(int d, int c)
+{
+ Value *res;
+ Value *ptr = NULL, *dimRel = NULL;
+
+ tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
+
+ if (dst.isIndirect(0))
+ ptr = fetchSrc(dst.getIndirect(0), 0, NULL);
+
+ if (dst.is2D()) {
+ switch (dst.getFile()) {
+ case TGSI_FILE_OUTPUT:
+ assert(0); // TODO
+ dimRel = NULL;
+ break;
+ case TGSI_FILE_INPUT:
+ assert(0); // TODO
+ dimRel = NULL;
+ break;
+ case TGSI_FILE_CONSTANT:
+ // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
+ if (dst.isIndirect(1))
+ dimRel = fetchSrc(dst.getIndirect(1), 0, 0);
+ break;
+ default:
+ break;
+ }
+ }
+
+ struct tgsi_full_src_register fsr = dst.asSrc();
+ tgsi::Instruction::SrcRegister src(&fsr);
+ res = fetchSrc(src, c, ptr);
+
+ if (dimRel)
+ res->getInsn()->setIndirect(0, 1, dimRel);
+
+ return res;
+}
+
Converter::DataArray *
Converter::getArrayForFile(unsigned file, int idx)
{
@@ -2645,7 +2714,7 @@ Converter::handleLOAD(Value *dst0[4])
ld->setIndirect(0, 1, ind);
}
break;
- case TGSI_FILE_IMAGE: {
+ default: {
getImageCoords(off, 1);
def.resize(4);
@@ -2656,22 +2725,28 @@ Converter::handleLOAD(Value *dst0[4])
def[c] = dst0[c];
}
+ bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
+ if (bindless)
+ ind = fetchSrc(0, 0);
+
TexInstruction *ld =
- mkTex(OP_SULDP, tgsi.getImageTarget(), r, 0, def, off);
+ mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);
ld->tex.mask = tgsi.getDst(0).getMask();
ld->tex.format = tgsi.getImageFormat();
ld->cache = tgsi.getCacheMode();
+ ld->tex.bindless = bindless;
+ if (!bindless)
+ ld->tex.r = r;
if (ind)
ld->setIndirectR(ind);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
if (dst0[c] != def[c])
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
- }
break;
- default:
- assert(!"Unsupported srcFile for LOAD");
}
+ }
+
/* Keep this around for now as reference when adding img support
getResourceCoords(off, r, 1);
@@ -2780,24 +2855,30 @@ Converter::handleSTORE()
st->setIndirect(0, 1, ind);
}
break;
- case TGSI_FILE_IMAGE: {
+ default: {
getImageCoords(off, 0);
src = off;
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
src.push_back(fetchSrc(1, c));
+ bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;
+ if (bindless)
+ ind = fetchDst(0, 0);
+
TexInstruction *st =
- mkTex(OP_SUSTP, tgsi.getImageTarget(), r, 0, dummy, src);
+ mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);
st->tex.mask = tgsi.getDst(0).getMask();
st->tex.format = tgsi.getImageFormat();
st->cache = tgsi.getCacheMode();
+ st->tex.bindless = bindless;
+ if (!bindless)
+ st->tex.r = r;
if (ind)
st->setIndirectR(ind);
- }
+
break;
- default:
- assert(!"Unsupported dstFile for STORE");
+ }
}
/* Keep this around for now as reference when adding img support
@@ -2896,7 +2977,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
break;
- case TGSI_FILE_IMAGE: {
+ default: {
getImageCoords(srcv, 1);
defv.push_back(dst);
srcv.push_back(fetchSrc(2, 0));
@@ -2904,22 +2985,27 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
srcv.push_back(fetchSrc(3, 0));
+ bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
+ if (bindless)
+ ind = fetchSrc(0, 0);
+
TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),
- r, 0, defv, srcv);
+ 0, 0, defv, srcv);
tex->subOp = subOp;
tex->tex.mask = 1;
tex->tex.format = tgsi.getImageFormat();
tex->setType(ty);
+ tex->tex.bindless = bindless;
+ if (!bindless)
+ tex->tex.r = r;
if (ind)
tex->setIndirectR(ind);
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
- }
break;
- default:
- assert(!"Unsupported srcFile for ATOM");
+ }
}
/* Keep this around for now as reference when adding img support
@@ -3122,7 +3208,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
- if (tgsi.dstCount()) {
+ if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
for (c = 0; c < 4; ++c) {
rDst0[c] = acquireDst(0, c);
dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
@@ -3747,8 +3833,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
if (ind)
geni->setIndirect(0, 1, ind);
} else {
- assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE);
-
TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
for (int c = 0, d = 0; c < 4; ++c) {
if (dst0[c]) {
@@ -3756,12 +3840,16 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
texi->tex.mask |= 1 << c;
}
}
- texi->tex.r = tgsi.getSrc(0).getIndex(0);
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {
+ texi->tex.r = tgsi.getSrc(0).getIndex(0);
+ if (tgsi.getSrc(0).isIndirect(0))
+ texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+ } else {
+ texi->tex.bindless = true;
+ texi->setIndirectR(fetchSrc(0, 0));
+ }
texi->tex.target = tgsi.getImageTarget();
- if (tgsi.getSrc(0).isIndirect(0))
- texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
-
bb->insertTail(texi);
}
break;
@@ -4117,7 +4205,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
break;
}
- if (tgsi.dstCount()) {
+ if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 4e65d449ebf..e07f57e782d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1742,19 +1742,23 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4)
inline Value *
-NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off)
+NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless)
{
uint32_t base = slot * NVC0_SU_INFO__STRIDE;
if (ptr) {
ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
- ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
+ if (bindless)
+ ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(511));
+ else
+ ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(6));
base = 0;
}
off += base;
- return loadResInfo32(ptr, off, prog->driver->io.suInfoBase);
+ return loadResInfo32(ptr, off, bindless ? prog->driver->io.bindlessBase :
+ prog->driver->io.suInfoBase);
}
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
@@ -1800,7 +1804,7 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq)
} else {
offset = NVC0_SU_INFO_SIZE(c);
}
- bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset));
+ bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset, suq->tex.bindless));
if (c == 2 && suq->tex.target.isCube())
bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1),
bld.loadImm(NULL, 6));
@@ -1808,8 +1812,8 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq)
if (mask & 1) {
if (suq->tex.target.isMS()) {
- Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0));
- Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1));
+ Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless);
+ Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless);
Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, ms_y);
bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), ms);
} else {
@@ -1842,8 +1846,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
Value *ind = tex->getIndirectR();
- Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0));
- Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1));
+ Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
+ Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
@@ -1903,9 +1907,9 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
src[c] = bld.getScratch();
if (c == 0 && raw)
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X, su->tex.bindless);
else
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc));
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc), su->tex.bindless);
bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero)
->subOp = getSuClampSubOp(su, dimc);
}
@@ -1927,16 +1931,16 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff));
} else
if (dim == 3) {
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless);
bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
} else {
assert(dim == 2);
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless);
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ?
NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
@@ -1947,7 +1951,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
if (raw) {
bf = src[0];
} else {
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless);
bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero)
->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
}
@@ -1964,7 +1968,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
case 2:
z = off;
if (!su->tex.target.isArray() && !su->tex.target.isCube()) {
- z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C);
+ z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
subOp = NV50_IR_SUBOP_SUBFM_3D;
}
break;
@@ -1979,7 +1983,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
}
// part 2
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless);
if (su->tex.target == TEX_TARGET_BUFFER) {
eau = v;
@@ -1988,7 +1992,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
}
// add array layer offset
if (su->tex.target.isArray() || su->tex.target.isCube()) {
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
if (dim == 1)
bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
@@ -2028,7 +2032,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
// let's just set it 0 for raw access and hope it works
v = raw ?
- bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT);
+ bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless);
// get rid of old coordinate sources, make space for fmt info and predicate
su->moveSources(arg, 3 - arg);
@@ -2036,12 +2040,13 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
su->setSrc(0, addr);
su->setSrc(1, v);
su->setSrc(2, pred);
+ su->setIndirectR(NULL);
// prevent read fault when the image is not actually bound
CmpInstruction *pred1 =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, bld.mkImm(0),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
if (su->op != OP_SUSTP && su->tex.format) {
const TexInstruction::ImgFormatDesc *format = su->tex.format;
@@ -2052,7 +2057,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
assert(format->components != 0);
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred1->getDef(0),
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
pred1->getDef(0));
}
su->setPredicate(CC_NOT_P, pred1->getDef(0));
@@ -2247,13 +2252,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
// calculate pixel offset
if (su->op == OP_SULDP || su->op == OP_SUREDP) {
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless);
su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
}
// add array layer offset
if (su->tex.target.isArray() || su->tex.target.isCube()) {
- v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY);
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
assert(dim > 1);
su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
}
@@ -2262,7 +2267,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
CmpInstruction *pred =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, bld.mkImm(0),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
if (su->op != OP_SUSTP && su->tex.format) {
const TexInstruction::ImgFormatDesc *format = su->tex.format;
int blockwidth = format->bits[0] + format->bits[1] +
@@ -2272,7 +2277,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
// make sure that the format doesn't mismatch when it's not FMT_NONE
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
pred->getDef(0));
}
su->setPredicate(CC_NOT_P, pred->getDef(0));
@@ -2361,7 +2366,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
CmpInstruction *pred =
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
TYPE_U32, bld.mkImm(0),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR));
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
if (su->op != OP_SUSTP && su->tex.format) {
const TexInstruction::ImgFormatDesc *format = su->tex.format;
int blockwidth = format->bits[0] + format->bits[1] +
@@ -2371,7 +2376,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
// make sure that the format doesn't mismatch when it's not FMT_NONE
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
pred->getDef(0));
}
su->setPredicate(CC_NOT_P, pred->getDef(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 7fae7e24b99..37d52976657 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -130,7 +130,7 @@ private:
Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
- Value *loadSuInfo32(Value *ptr, int slot, uint32_t off);
+ Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
Value *loadBufInfo64(Value *ptr, uint32_t off);
Value *loadBufLength32(Value *ptr, uint32_t off);
Value *loadUboInfo64(Value *ptr, uint32_t off);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index c5b625ecb45..0729c88dffa 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -108,9 +108,9 @@
/* 6 user uniform buffers, at 64K each */
#define NVC0_CB_USR_INFO(s) (s << 16)
#define NVC0_CB_USR_SIZE (6 << 16)
-/* 6 driver constbuts, at 2K each */
-#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 11)
-#define NVC0_CB_AUX_SIZE (1 << 11)
+/* 6 driver constbuts, at 64K each */
+#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 16)
+#define NVC0_CB_AUX_SIZE (1 << 16)
/* XXX: Figure out what this UNK data is. */
#define NVC0_CB_AUX_UNK_INFO 0x000
#define NVC0_CB_AUX_UNK_SIZE (8 * 4)
@@ -146,6 +146,9 @@
/* 1 64-bits address and 1 32-bits sequence */
#define NVC0_CB_AUX_MP_INFO 0x620
#define NVC0_CB_AUX_MP_SIZE 3 * 4
+/* 512 64-byte blocks for bindless image handles */
+#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4
+#define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4)
/* 4 32-bits floats for the vertex runout, put at the end */
#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
@@ -355,7 +358,8 @@ void nvc0_validate_textures(struct nvc0_context *);
void nvc0_validate_samplers(struct nvc0_context *);
void nve4_set_tex_handles(struct nvc0_context *);
void nvc0_validate_surfaces(struct nvc0_context *);
-void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_image_view *,
+void nve4_set_surface_info(struct nouveau_pushbuf *,
+ const struct pipe_image_view *,
struct nvc0_context *);
void nvc0_mark_image_range_valid(const struct pipe_image_view *);
bool nvc0_update_tic(struct nvc0_context *, struct nv50_tic_entry *,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 46a15d76df0..e6157f550d6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -592,6 +592,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
if (info->target >= NVISA_GK104_CHIPSET) {
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
+ info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0);
}
if (prog->type == PIPE_SHADER_COMPUTE) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index cac4bb89271..e51d5163539 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -1057,7 +1057,8 @@ nvc0_screen_create(struct nouveau_device *dev)
if (ret)
FAIL_SCREEN_INIT("Error allocating TEXT area: %d\n", ret);
- ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL,
+ /* 6 user uniform areas, 6 driver areas, and 1 for the runout */
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 13 << 16, NULL,
&screen->uniform_bo);
if (ret)
FAIL_SCREEN_INIT("Error allocating uniform BO: %d\n", ret);
@@ -1279,8 +1280,11 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_KICK (push);
- screen->tic.entries = CALLOC(4096, sizeof(void *));
- screen->tsc.entries = screen->tic.entries + 2048;
+ screen->tic.entries = CALLOC(
+ NVC0_TIC_MAX_ENTRIES + NVC0_TSC_MAX_ENTRIES + NVE4_IMG_MAX_HANDLES,
+ sizeof(void *));
+ screen->tsc.entries = screen->tic.entries + NVC0_TIC_MAX_ENTRIES;
+ screen->img.entries = (void *)(screen->tsc.entries + NVC0_TSC_MAX_ENTRIES);
if (!nvc0_blitter_create(screen))
goto fail;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 30635c757f1..efd62a8a412 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -13,6 +13,7 @@
#define NVC0_TIC_MAX_ENTRIES 2048
#define NVC0_TSC_MAX_ENTRIES 2048
+#define NVE4_IMG_MAX_HANDLES 512
/* doesn't count driver-reserved slot */
#define NVC0_MAX_PIPE_CONSTBUFS 15
@@ -97,6 +98,11 @@ struct nvc0_screen {
} tsc;
struct {
+ struct pipe_image_view **entries;
+ int next;
+ } img;
+
+ struct {
struct nouveau_bo *bo;
uint32_t *map;
} fence;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 88cec7fcef9..9e391fe1acf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -876,20 +876,13 @@ nve4_make_texture_handle_resident(struct pipe_context *pipe,
}
}
-void
-nvc0_init_bindless_functions(struct pipe_context *pipe) {
- pipe->create_texture_handle = nve4_create_texture_handle;
- pipe->delete_texture_handle = nve4_delete_texture_handle;
- pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
-}
-
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
static void
-nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height,
- int *depth)
+nvc0_get_surface_dims(const struct pipe_image_view *view,
+ int *width, int *height, int *depth)
{
struct nv04_resource *res = nv04_resource(view->resource);
int level;
@@ -937,7 +930,7 @@ nvc0_mark_image_range_valid(const struct pipe_image_view *view)
void
nve4_set_surface_info(struct nouveau_pushbuf *push,
- struct pipe_image_view *view,
+ const struct pipe_image_view *view,
struct nvc0_context *nvc0)
{
struct nvc0_screen *screen = nvc0->screen;
@@ -1070,7 +1063,7 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
static inline void
nvc0_set_surface_info(struct nouveau_pushbuf *push,
- struct pipe_image_view *view, uint64_t address,
+ const struct pipe_image_view *view, uint64_t address,
int width, int height, int depth)
{
struct nv04_resource *res;
@@ -1319,6 +1312,91 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0)
}
}
+static uint64_t
+nve4_create_image_handle(struct pipe_context *pipe,
+ const struct pipe_image_view *view)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ int i = screen->img.next, s;
+
+ while (screen->img.entries[i]) {
+ i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
+ if (i == screen->img.next)
+ return 0;
+ }
+
+ screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
+ screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view));
+ *screen->img.entries[i] = *view;
+
+ for (s = 0; s < 6; s++) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
+ PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i));
+ nve4_set_surface_info(push, view, nvc0);
+ }
+
+ return 0x100000000ULL | i;
+}
+
+static void
+nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_screen *screen = nvc0->screen;
+ int i = handle & (NVE4_IMG_MAX_HANDLES - 1);
+
+ free(screen->img.entries[i]);
+ screen->img.entries[i] = NULL;
+}
+
+static void
+nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
+ unsigned access, bool resident)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_screen *screen = nvc0->screen;
+
+ if (resident) {
+ struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
+ struct pipe_image_view *view =
+ screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)];
+ assert(view);
+
+ if (view->resource->target == PIPE_BUFFER &&
+ access & PIPE_IMAGE_ACCESS_WRITE)
+ nvc0_mark_image_range_valid(view);
+ res->handle = handle;
+ res->buf = nv04_resource(view->resource);
+ res->flags = (access & 3) << 8;
+ list_add(&res->list, &nvc0->img_head);
+ } else {
+ list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
+ if (pos->handle == handle) {
+ list_del(&pos->list);
+ free(pos);
+ break;
+ }
+ }
+ }
+}
+
+void
+nvc0_init_bindless_functions(struct pipe_context *pipe) {
+ pipe->create_texture_handle = nve4_create_texture_handle;
+ pipe->delete_texture_handle = nve4_delete_texture_handle;
+ pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
+
+ pipe->create_image_handle = nve4_create_image_handle;
+ pipe->delete_image_handle = nve4_delete_image_handle;
+ pipe->make_image_handle_resident = nve4_make_image_handle_resident;
+}
+
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
{
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 4cd3712203b..66de6d9e2fa 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -977,6 +977,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
resident->flags);
}
+ list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {
+ nvc0_add_resident(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS, resident->buf,
+ resident->flags);
+ }
+
nvc0_state_validate_3d(nvc0, ~0);
if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index db501afb9d1..f641f4777df 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -686,6 +686,11 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
resident->flags);
}
+ list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {
+ nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf,
+ resident->flags);
+ }
+
ret = !nve4_state_validate_cp(nvc0, ~0);
if (ret)
goto out;