diff options
author | Ilia Mirkin <[email protected]> | 2017-06-24 00:44:35 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2018-01-07 11:40:35 -0500 |
commit | 23a6e8d8ff5effa7b88bd77c3bf396879a2ece0c (patch) | |
tree | da4107ad5ff1078b4468e86fcdc3854f08787926 | |
parent | 8eb1214755366fc34ed15a7e3dec48d4f0d65f10 (diff) |
nvc0: add bindless image support for kepler
A part of the driver constbuf area is allocated for bindless images. Any
update requires uploading to all driver constbufs. This also extends the
driver constbuf to 64KB, up from 2KB.
Signed-off-by: Ilia Mirkin <[email protected]>
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 148 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 57 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 100 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 5 |
11 files changed, 272 insertions, 75 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 520d1d6743e..3d0782f86b5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -180,6 +180,7 @@ struct nv50_ir_prog_info uint16_t texBindBase; /* base address for tex handles (nve4) */ uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */ uint16_t suInfoBase; /* base address for surface info (nve4) */ + uint16_t bindlessBase; /* base address for bindless image info (nve4) */ uint16_t bufInfoBase; /* base address for buffer info */ uint16_t sampleInfoBase; /* base address for sample positions */ uint8_t msInfoCBSlot; /* cX[] used for multisample info */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 6c615806b83..09b5228127a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_util.h" @@ -163,6 +164,12 @@ public: return SrcRegister(fdr->Indirect); } + struct tgsi_full_src_register asSrc() + { + assert(fdr); + return tgsi_full_src_register_from_dst(fdr); + } + int getArrayId() const { if (isIndirect(0)) @@ -1503,13 +1510,6 @@ void Source::scanInstructionSrc(const Instruction& insn, if (src.isIndirect(0)) indirectTempArrays.insert(src.getArrayId()); } else - if (src.getFile() == TGSI_FILE_BUFFER || - src.getFile() == TGSI_FILE_IMAGE || - (src.getFile() == TGSI_FILE_MEMORY && - memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { - info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? - 0x1 : 0x2; - } else if (src.getFile() == TGSI_FILE_OUTPUT) { if (src.isIndirect(0)) { // We don't know which one is accessed, just mark everything for @@ -1580,6 +1580,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (isEdgeFlagPassthrough(insn)) info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); } else + if (dst.getFile() != TGSI_FILE_MEMORY && + insn.getOpcode() == TGSI_OPCODE_STORE) { + info->io.globalAccess |= 0x2; + } else if (dst.getFile() == TGSI_FILE_TEMPORARY) { if (dst.isIndirect(0)) indirectTempArrays.insert(dst.getArrayId()); @@ -1592,6 +1596,29 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) } } + if (insn.srcCount() && ( + insn.getSrc(0).getFile() != TGSI_FILE_MEMORY || + memoryFiles[insn.getSrc(0).getIndex(0)].mem_type == + TGSI_MEMORY_TYPE_GLOBAL)) { + switch (insn.getOpcode()) { + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_LOAD: + info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + 0x1 : 0x2; + break; + } + } + + for (unsigned s = 0; s < insn.srcCount(); ++s) scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s)); @@ -1648,6 +1675,7 @@ private: Value *getOutputBase(int s); DataArray *getArrayForFile(unsigned file, int idx); Value *fetchSrc(int s, int c); + Value *fetchDst(int d, int c); Value *acquireDst(int d, int c); void storeDst(int d, int c, Value *); @@ -1956,6 +1984,47 @@ Converter::fetchSrc(int s, int c) return applySrcMod(res, s, c); } +Value * +Converter::fetchDst(int d, int c) +{ + Value *res; + Value *ptr = NULL, *dimRel = NULL; + + tgsi::Instruction::DstRegister dst = tgsi.getDst(d); + + if (dst.isIndirect(0)) + ptr = fetchSrc(dst.getIndirect(0), 0, NULL); + + if (dst.is2D()) { + switch (dst.getFile()) { + case TGSI_FILE_OUTPUT: + assert(0); // TODO + dimRel = NULL; + break; + case TGSI_FILE_INPUT: + assert(0); // TODO + dimRel = NULL; + break; + case TGSI_FILE_CONSTANT: + // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] + if (dst.isIndirect(1)) + dimRel = fetchSrc(dst.getIndirect(1), 0, 0); + break; + default: + break; + } + } + + struct tgsi_full_src_register fsr = dst.asSrc(); + tgsi::Instruction::SrcRegister src(&fsr); + res = fetchSrc(src, c, ptr); + + if (dimRel) + res->getInsn()->setIndirect(0, 1, dimRel); + + return res; +} + Converter::DataArray * Converter::getArrayForFile(unsigned file, int idx) { @@ -2645,7 +2714,7 @@ Converter::handleLOAD(Value *dst0[4]) ld->setIndirect(0, 1, ind); } break; - case TGSI_FILE_IMAGE: { + default: { getImageCoords(off, 1); def.resize(4); @@ -2656,22 +2725,28 @@ Converter::handleLOAD(Value *dst0[4]) def[c] = dst0[c]; } + bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchSrc(0, 0); + TexInstruction *ld = - mkTex(OP_SULDP, tgsi.getImageTarget(), r, 0, def, off); + mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off); ld->tex.mask = tgsi.getDst(0).getMask(); ld->tex.format = tgsi.getImageFormat(); ld->cache = tgsi.getCacheMode(); + ld->tex.bindless = bindless; + if (!bindless) + ld->tex.r = r; if (ind) ld->setIndirectR(ind); FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) if (dst0[c] != def[c]) mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); - } break; - default: - assert(!"Unsupported srcFile for LOAD"); } + } + /* Keep this around for now as reference when adding img support getResourceCoords(off, r, 1); @@ -2780,24 +2855,30 @@ Converter::handleSTORE() st->setIndirect(0, 1, ind); } break; - case TGSI_FILE_IMAGE: { + default: { getImageCoords(off, 0); src = off; FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) src.push_back(fetchSrc(1, c)); + bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchDst(0, 0); + TexInstruction *st = - mkTex(OP_SUSTP, tgsi.getImageTarget(), r, 0, dummy, src); + mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src); st->tex.mask = tgsi.getDst(0).getMask(); st->tex.format = tgsi.getImageFormat(); st->cache = tgsi.getCacheMode(); + st->tex.bindless = bindless; + if (!bindless) + st->tex.r = r; if (ind) st->setIndirectR(ind); - } + break; - default: - assert(!"Unsupported dstFile for STORE"); + } } /* Keep this around for now as reference when adding img support @@ -2896,7 +2977,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov break; - case TGSI_FILE_IMAGE: { + default: { getImageCoords(srcv, 1); defv.push_back(dst); srcv.push_back(fetchSrc(2, 0)); @@ -2904,22 +2985,27 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) if (subOp == NV50_IR_SUBOP_ATOM_CAS) srcv.push_back(fetchSrc(3, 0)); + bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchSrc(0, 0); + TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(), - r, 0, defv, srcv); + 0, 0, defv, srcv); tex->subOp = subOp; tex->tex.mask = 1; tex->tex.format = tgsi.getImageFormat(); tex->setType(ty); + tex->tex.bindless = bindless; + if (!bindless) + tex->tex.r = r; if (ind) tex->setIndirectR(ind); for (int c = 0; c < 4; ++c) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov - } break; - default: - assert(!"Unsupported srcFile for ATOM"); + } } /* Keep this around for now as reference when adding img support @@ -3122,7 +3208,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; - if (tgsi.dstCount()) { + if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) { for (c = 0; c < 4; ++c) { rDst0[c] = acquireDst(0, c); dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; @@ -3747,8 +3833,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) if (ind) geni->setIndirect(0, 1, ind); } else { - assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE); - TexInstruction *texi = new_TexInstruction(func, OP_SUQ); for (int c = 0, d = 0; c < 4; ++c) { if (dst0[c]) { @@ -3756,12 +3840,16 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) texi->tex.mask |= 1 << c; } } - texi->tex.r = tgsi.getSrc(0).getIndex(0); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) { + texi->tex.r = tgsi.getSrc(0).getIndex(0); + if (tgsi.getSrc(0).isIndirect(0)) + texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); + } else { + texi->tex.bindless = true; + texi->setIndirectR(fetchSrc(0, 0)); + } texi->tex.target = tgsi.getImageTarget(); - if (tgsi.getSrc(0).isIndirect(0)) - texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); - bb->insertTail(texi); } break; @@ -4117,7 +4205,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) break; } - if (tgsi.dstCount()) { + if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) { for (c = 0; c < 4; ++c) { if (!dst0[c]) continue; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 4e65d449ebf..e07f57e782d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1742,19 +1742,23 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) #define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) inline Value * -NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off) +NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless) { uint32_t base = slot * NVC0_SU_INFO__STRIDE; if (ptr) { ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); - ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); + if (bindless) + ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(511)); + else + ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(6)); base = 0; } off += base; - return loadResInfo32(ptr, off, prog->driver->io.suInfoBase); + return loadResInfo32(ptr, off, bindless ? prog->driver->io.bindlessBase : + prog->driver->io.suInfoBase); } static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) @@ -1800,7 +1804,7 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq) } else { offset = NVC0_SU_INFO_SIZE(c); } - bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset)); + bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset, suq->tex.bindless)); if (c == 2 && suq->tex.target.isCube()) bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1), bld.loadImm(NULL, 6)); @@ -1808,8 +1812,8 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq) if (mask & 1) { if (suq->tex.target.isMS()) { - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); + Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless); + Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless); Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, ms_y); bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), ms); } else { @@ -1842,8 +1846,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); Value *ind = tex->getIndirectR(); - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); + Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless); + Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless); bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); @@ -1903,9 +1907,9 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) src[c] = bld.getScratch(); if (c == 0 && raw) - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X, su->tex.bindless); else - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc)); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc), su->tex.bindless); bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) ->subOp = getSuClampSubOp(su, dimc); } @@ -1927,16 +1931,16 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); } else if (dim == 3) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l } else { assert(dim == 2); - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ? NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l @@ -1947,7 +1951,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) if (raw) { bf = src[0]; } else { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless); bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); } @@ -1964,7 +1968,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) case 2: z = off; if (!su->tex.target.isArray() && !su->tex.target.isCube()) { - z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); + z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); subOp = NV50_IR_SUBOP_SUBFM_3D; } break; @@ -1979,7 +1983,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) } // part 2 - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless); if (su->tex.target == TEX_TARGET_BUFFER) { eau = v; @@ -1988,7 +1992,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) } // add array layer offset if (su->tex.target.isArray() || su->tex.target.isCube()) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless); if (dim == 1) bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 @@ -2028,7 +2032,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) // let's just set it 0 for raw access and hope it works v = raw ? - bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); + bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless); // get rid of old coordinate sources, make space for fmt info and predicate su->moveSources(arg, 3 - arg); @@ -2036,12 +2040,13 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) su->setSrc(0, addr); su->setSrc(1, v); su->setSrc(2, pred); + su->setIndirectR(NULL); // prevent read fault when the image is not actually bound CmpInstruction *pred1 = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; @@ -2052,7 +2057,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) assert(format->components != 0); bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred1->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred1->getDef(0)); } su->setPredicate(CC_NOT_P, pred1->getDef(0)); @@ -2247,13 +2252,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) // calculate pixel offset if (su->op == OP_SULDP || su->op == OP_SUREDP) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless); su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v)); } // add array layer offset if (su->tex.target.isArray() || su->tex.target.isCube()) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless); assert(dim > 1); su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)); } @@ -2262,7 +2267,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) CmpInstruction *pred = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; int blockwidth = format->bits[0] + format->bits[1] + @@ -2272,7 +2277,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) // make sure that the format doesn't mismatch when it's not FMT_NONE bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred->getDef(0)); } su->setPredicate(CC_NOT_P, pred->getDef(0)); @@ -2361,7 +2366,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) CmpInstruction *pred = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; int blockwidth = format->bits[0] + format->bits[1] + @@ -2371,7 +2376,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) // make sure that the format doesn't mismatch when it's not FMT_NONE bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred->getDef(0)); } su->setPredicate(CC_NOT_P, pred->getDef(0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 7fae7e24b99..37d52976657 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -130,7 +130,7 @@ private: Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base); Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base); Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base); - Value *loadSuInfo32(Value *ptr, int slot, uint32_t off); + Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); Value *loadBufInfo64(Value *ptr, uint32_t off); Value *loadBufLength32(Value *ptr, uint32_t off); Value *loadUboInfo64(Value *ptr, uint32_t off); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index c5b625ecb45..0729c88dffa 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -108,9 +108,9 @@ /* 6 user uniform buffers, at 64K each */ #define NVC0_CB_USR_INFO(s) (s << 16) #define NVC0_CB_USR_SIZE (6 << 16) -/* 6 driver constbuts, at 2K each */ -#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 11) -#define NVC0_CB_AUX_SIZE (1 << 11) +/* 6 driver constbuts, at 64K each */ +#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 16) +#define NVC0_CB_AUX_SIZE (1 << 16) /* XXX: Figure out what this UNK data is. */ #define NVC0_CB_AUX_UNK_INFO 0x000 #define NVC0_CB_AUX_UNK_SIZE (8 * 4) @@ -146,6 +146,9 @@ /* 1 64-bits address and 1 32-bits sequence */ #define NVC0_CB_AUX_MP_INFO 0x620 #define NVC0_CB_AUX_MP_SIZE 3 * 4 +/* 512 64-byte blocks for bindless image handles */ +#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4 +#define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4) /* 4 32-bits floats for the vertex runout, put at the end */ #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6) @@ -355,7 +358,8 @@ void nvc0_validate_textures(struct nvc0_context *); void nvc0_validate_samplers(struct nvc0_context *); void nve4_set_tex_handles(struct nvc0_context *); void nvc0_validate_surfaces(struct nvc0_context *); -void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_image_view *, +void nve4_set_surface_info(struct nouveau_pushbuf *, + const struct pipe_image_view *, struct nvc0_context *); void nvc0_mark_image_range_valid(const struct pipe_image_view *); bool nvc0_update_tic(struct nvc0_context *, struct nv50_tic_entry *, diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 46a15d76df0..e6157f550d6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -592,6 +592,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, if (info->target >= NVISA_GK104_CHIPSET) { info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO; + info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0); } if (prog->type == PIPE_SHADER_COMPUTE) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index cac4bb89271..e51d5163539 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -1057,7 +1057,8 @@ nvc0_screen_create(struct nouveau_device *dev) if (ret) FAIL_SCREEN_INIT("Error allocating TEXT area: %d\n", ret); - ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL, + /* 6 user uniform areas, 6 driver areas, and 1 for the runout */ + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 13 << 16, NULL, &screen->uniform_bo); if (ret) FAIL_SCREEN_INIT("Error allocating uniform BO: %d\n", ret); @@ -1279,8 +1280,11 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_KICK (push); - screen->tic.entries = CALLOC(4096, sizeof(void *)); - screen->tsc.entries = screen->tic.entries + 2048; + screen->tic.entries = CALLOC( + NVC0_TIC_MAX_ENTRIES + NVC0_TSC_MAX_ENTRIES + NVE4_IMG_MAX_HANDLES, + sizeof(void *)); + screen->tsc.entries = screen->tic.entries + NVC0_TIC_MAX_ENTRIES; + screen->img.entries = (void *)(screen->tsc.entries + NVC0_TSC_MAX_ENTRIES); if (!nvc0_blitter_create(screen)) goto fail; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 30635c757f1..efd62a8a412 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -13,6 +13,7 @@ #define NVC0_TIC_MAX_ENTRIES 2048 #define NVC0_TSC_MAX_ENTRIES 2048 +#define NVE4_IMG_MAX_HANDLES 512 /* doesn't count driver-reserved slot */ #define NVC0_MAX_PIPE_CONSTBUFS 15 @@ -97,6 +98,11 @@ struct nvc0_screen { } tsc; struct { + struct pipe_image_view **entries; + int next; + } img; + + struct { struct nouveau_bo *bo; uint32_t *map; } fence; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 88cec7fcef9..9e391fe1acf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -876,20 +876,13 @@ nve4_make_texture_handle_resident(struct pipe_context *pipe, } } -void -nvc0_init_bindless_functions(struct pipe_context *pipe) { - pipe->create_texture_handle = nve4_create_texture_handle; - pipe->delete_texture_handle = nve4_delete_texture_handle; - pipe->make_texture_handle_resident = nve4_make_texture_handle_resident; -} - static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT]; static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT]; static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT]; static void -nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height, - int *depth) +nvc0_get_surface_dims(const struct pipe_image_view *view, + int *width, int *height, int *depth) { struct nv04_resource *res = nv04_resource(view->resource); int level; @@ -937,7 +930,7 @@ nvc0_mark_image_range_valid(const struct pipe_image_view *view) void nve4_set_surface_info(struct nouveau_pushbuf *push, - struct pipe_image_view *view, + const struct pipe_image_view *view, struct nvc0_context *nvc0) { struct nvc0_screen *screen = nvc0->screen; @@ -1070,7 +1063,7 @@ nve4_set_surface_info(struct nouveau_pushbuf *push, static inline void nvc0_set_surface_info(struct nouveau_pushbuf *push, - struct pipe_image_view *view, uint64_t address, + const struct pipe_image_view *view, uint64_t address, int width, int height, int depth) { struct nv04_resource *res; @@ -1319,6 +1312,91 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0) } } +static uint64_t +nve4_create_image_handle(struct pipe_context *pipe, + const struct pipe_image_view *view) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + int i = screen->img.next, s; + + while (screen->img.entries[i]) { + i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1); + if (i == screen->img.next) + return 0; + } + + screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1); + screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view)); + *screen->img.entries[i] = *view; + + for (s = 0; s < 6; s++) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16); + PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i)); + nve4_set_surface_info(push, view, nvc0); + } + + return 0x100000000ULL | i; +} + +static void +nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_screen *screen = nvc0->screen; + int i = handle & (NVE4_IMG_MAX_HANDLES - 1); + + free(screen->img.entries[i]); + screen->img.entries[i] = NULL; +} + +static void +nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle, + unsigned access, bool resident) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_screen *screen = nvc0->screen; + + if (resident) { + struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident)); + struct pipe_image_view *view = + screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)]; + assert(view); + + if (view->resource->target == PIPE_BUFFER && + access & PIPE_IMAGE_ACCESS_WRITE) + nvc0_mark_image_range_valid(view); + res->handle = handle; + res->buf = nv04_resource(view->resource); + res->flags = (access & 3) << 8; + list_add(&res->list, &nvc0->img_head); + } else { + list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) { + if (pos->handle == handle) { + list_del(&pos->list); + free(pos); + break; + } + } + } +} + +void +nvc0_init_bindless_functions(struct pipe_context *pipe) { + pipe->create_texture_handle = nve4_create_texture_handle; + pipe->delete_texture_handle = nve4_delete_texture_handle; + pipe->make_texture_handle_resident = nve4_make_texture_handle_resident; + + pipe->create_image_handle = nve4_create_image_handle; + pipe->delete_image_handle = nve4_delete_image_handle; + pipe->make_image_handle_resident = nve4_make_image_handle_resident; +} + static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] = { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 4cd3712203b..66de6d9e2fa 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -977,6 +977,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) resident->flags); } + list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) { + nvc0_add_resident(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS, resident->buf, + resident->flags); + } + nvc0_state_validate_3d(nvc0, ~0); if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) { diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index db501afb9d1..f641f4777df 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -686,6 +686,11 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) resident->flags); } + list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) { + nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf, + resident->flags); + } + ret = !nve4_state_validate_cp(nvc0, ~0); if (ret) goto out; |