diff options
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 148 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 57 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 100 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 5 |
11 files changed, 272 insertions, 75 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 520d1d6743e..3d0782f86b5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -180,6 +180,7 @@ struct nv50_ir_prog_info uint16_t texBindBase; /* base address for tex handles (nve4) */ uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */ uint16_t suInfoBase; /* base address for surface info (nve4) */ + uint16_t bindlessBase; /* base address for bindless image info (nve4) */ uint16_t bufInfoBase; /* base address for buffer info */ uint16_t sampleInfoBase; /* base address for sample positions */ uint8_t msInfoCBSlot; /* cX[] used for multisample info */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 6c615806b83..09b5228127a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_util.h" @@ -163,6 +164,12 @@ public: return SrcRegister(fdr->Indirect); } + struct tgsi_full_src_register asSrc() + { + assert(fdr); + return tgsi_full_src_register_from_dst(fdr); + } + int getArrayId() const { if (isIndirect(0)) @@ -1503,13 +1510,6 @@ void Source::scanInstructionSrc(const Instruction& insn, if (src.isIndirect(0)) indirectTempArrays.insert(src.getArrayId()); } else - if (src.getFile() == TGSI_FILE_BUFFER || - src.getFile() == TGSI_FILE_IMAGE || - (src.getFile() == TGSI_FILE_MEMORY && - memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { - info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? - 0x1 : 0x2; - } else if (src.getFile() == TGSI_FILE_OUTPUT) { if (src.isIndirect(0)) { // We don't know which one is accessed, just mark everything for @@ -1580,6 +1580,10 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (isEdgeFlagPassthrough(insn)) info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); } else + if (dst.getFile() != TGSI_FILE_MEMORY && + insn.getOpcode() == TGSI_OPCODE_STORE) { + info->io.globalAccess |= 0x2; + } else if (dst.getFile() == TGSI_FILE_TEMPORARY) { if (dst.isIndirect(0)) indirectTempArrays.insert(dst.getArrayId()); @@ -1592,6 +1596,29 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) } } + if (insn.srcCount() && ( + insn.getSrc(0).getFile() != TGSI_FILE_MEMORY || + memoryFiles[insn.getSrc(0).getIndex(0)].mem_type == + TGSI_MEMORY_TYPE_GLOBAL)) { + switch (insn.getOpcode()) { + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_LOAD: + info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + 0x1 : 0x2; + break; + } + } + + for (unsigned s = 0; s < insn.srcCount(); ++s) scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s)); @@ -1648,6 +1675,7 @@ private: Value *getOutputBase(int s); DataArray *getArrayForFile(unsigned file, int idx); Value *fetchSrc(int s, int c); + Value *fetchDst(int d, int c); Value *acquireDst(int d, int c); void storeDst(int d, int c, Value *); @@ -1956,6 +1984,47 @@ Converter::fetchSrc(int s, int c) return applySrcMod(res, s, c); } +Value * +Converter::fetchDst(int d, int c) +{ + Value *res; + Value *ptr = NULL, *dimRel = NULL; + + tgsi::Instruction::DstRegister dst = tgsi.getDst(d); + + if (dst.isIndirect(0)) + ptr = fetchSrc(dst.getIndirect(0), 0, NULL); + + if (dst.is2D()) { + switch (dst.getFile()) { + case TGSI_FILE_OUTPUT: + assert(0); // TODO + dimRel = NULL; + break; + case TGSI_FILE_INPUT: + assert(0); // TODO + dimRel = NULL; + break; + case TGSI_FILE_CONSTANT: + // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] + if (dst.isIndirect(1)) + dimRel = fetchSrc(dst.getIndirect(1), 0, 0); + break; + default: + break; + } + } + + struct tgsi_full_src_register fsr = dst.asSrc(); + tgsi::Instruction::SrcRegister src(&fsr); + res = fetchSrc(src, c, ptr); + + if (dimRel) + res->getInsn()->setIndirect(0, 1, dimRel); + + return res; +} + Converter::DataArray * Converter::getArrayForFile(unsigned file, int idx) { @@ -2645,7 +2714,7 @@ Converter::handleLOAD(Value *dst0[4]) ld->setIndirect(0, 1, ind); } break; - case TGSI_FILE_IMAGE: { + default: { getImageCoords(off, 1); def.resize(4); @@ -2656,22 +2725,28 @@ Converter::handleLOAD(Value *dst0[4]) def[c] = dst0[c]; } + bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchSrc(0, 0); + TexInstruction *ld = - mkTex(OP_SULDP, tgsi.getImageTarget(), r, 0, def, off); + mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off); ld->tex.mask = tgsi.getDst(0).getMask(); ld->tex.format = tgsi.getImageFormat(); ld->cache = tgsi.getCacheMode(); + ld->tex.bindless = bindless; + if (!bindless) + ld->tex.r = r; if (ind) ld->setIndirectR(ind); FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) if (dst0[c] != def[c]) mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); - } break; - default: - assert(!"Unsupported srcFile for LOAD"); } + } + /* Keep this around for now as reference when adding img support getResourceCoords(off, r, 1); @@ -2780,24 +2855,30 @@ Converter::handleSTORE() st->setIndirect(0, 1, ind); } break; - case TGSI_FILE_IMAGE: { + default: { getImageCoords(off, 0); src = off; FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) src.push_back(fetchSrc(1, c)); + bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchDst(0, 0); + TexInstruction *st = - mkTex(OP_SUSTP, tgsi.getImageTarget(), r, 0, dummy, src); + mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src); st->tex.mask = tgsi.getDst(0).getMask(); st->tex.format = tgsi.getImageFormat(); st->cache = tgsi.getCacheMode(); + st->tex.bindless = bindless; + if (!bindless) + st->tex.r = r; if (ind) st->setIndirectR(ind); - } + break; - default: - assert(!"Unsupported dstFile for STORE"); + } } /* Keep this around for now as reference when adding img support @@ -2896,7 +2977,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov break; - case TGSI_FILE_IMAGE: { + default: { getImageCoords(srcv, 1); defv.push_back(dst); srcv.push_back(fetchSrc(2, 0)); @@ -2904,22 +2985,27 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) if (subOp == NV50_IR_SUBOP_ATOM_CAS) srcv.push_back(fetchSrc(3, 0)); + bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE; + if (bindless) + ind = fetchSrc(0, 0); + TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(), - r, 0, defv, srcv); + 0, 0, defv, srcv); tex->subOp = subOp; tex->tex.mask = 1; tex->tex.format = tgsi.getImageFormat(); tex->setType(ty); + tex->tex.bindless = bindless; + if (!bindless) + tex->tex.r = r; if (ind) tex->setIndirectR(ind); for (int c = 0; c < 4; ++c) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov - } break; - default: - assert(!"Unsupported srcFile for ATOM"); + } } /* Keep this around for now as reference when adding img support @@ -3122,7 +3208,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; - if (tgsi.dstCount()) { + if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) { for (c = 0; c < 4; ++c) { rDst0[c] = acquireDst(0, c); dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; @@ -3747,8 +3833,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) if (ind) geni->setIndirect(0, 1, ind); } else { - assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE); - TexInstruction *texi = new_TexInstruction(func, OP_SUQ); for (int c = 0, d = 0; c < 4; ++c) { if (dst0[c]) { @@ -3756,12 +3840,16 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) texi->tex.mask |= 1 << c; } } - texi->tex.r = tgsi.getSrc(0).getIndex(0); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) { + texi->tex.r = tgsi.getSrc(0).getIndex(0); + if (tgsi.getSrc(0).isIndirect(0)) + texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); + } else { + texi->tex.bindless = true; + texi->setIndirectR(fetchSrc(0, 0)); + } texi->tex.target = tgsi.getImageTarget(); - if (tgsi.getSrc(0).isIndirect(0)) - texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL)); - bb->insertTail(texi); } break; @@ -4117,7 +4205,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) break; } - if (tgsi.dstCount()) { + if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) { for (c = 0; c < 4; ++c) { if (!dst0[c]) continue; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 4e65d449ebf..e07f57e782d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1742,19 +1742,23 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) #define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) inline Value * -NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off) +NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless) { uint32_t base = slot * NVC0_SU_INFO__STRIDE; if (ptr) { ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); - ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); + if (bindless) + ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(511)); + else + ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(6)); base = 0; } off += base; - return loadResInfo32(ptr, off, prog->driver->io.suInfoBase); + return loadResInfo32(ptr, off, bindless ? prog->driver->io.bindlessBase : + prog->driver->io.suInfoBase); } static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) @@ -1800,7 +1804,7 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq) } else { offset = NVC0_SU_INFO_SIZE(c); } - bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset)); + bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset, suq->tex.bindless)); if (c == 2 && suq->tex.target.isCube()) bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1), bld.loadImm(NULL, 6)); @@ -1808,8 +1812,8 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq) if (mask & 1) { if (suq->tex.target.isMS()) { - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); + Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless); + Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless); Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, ms_y); bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), ms); } else { @@ -1842,8 +1846,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); Value *ind = tex->getIndirectR(); - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); + Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless); + Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless); bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); @@ -1903,9 +1907,9 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) src[c] = bld.getScratch(); if (c == 0 && raw) - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X, su->tex.bindless); else - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc)); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc), su->tex.bindless); bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) ->subOp = getSuClampSubOp(su, dimc); } @@ -1927,16 +1931,16 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); } else if (dim == 3) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l } else { assert(dim == 2); - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ? NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l @@ -1947,7 +1951,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) if (raw) { bf = src[0]; } else { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless); bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); } @@ -1964,7 +1968,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) case 2: z = off; if (!su->tex.target.isArray() && !su->tex.target.isCube()) { - z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); + z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); subOp = NV50_IR_SUBOP_SUBFM_3D; } break; @@ -1979,7 +1983,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) } // part 2 - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless); if (su->tex.target == TEX_TARGET_BUFFER) { eau = v; @@ -1988,7 +1992,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) } // add array layer offset if (su->tex.target.isArray() || su->tex.target.isCube()) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless); if (dim == 1) bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 @@ -2028,7 +2032,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) // let's just set it 0 for raw access and hope it works v = raw ? - bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); + bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless); // get rid of old coordinate sources, make space for fmt info and predicate su->moveSources(arg, 3 - arg); @@ -2036,12 +2040,13 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) su->setSrc(0, addr); su->setSrc(1, v); su->setSrc(2, pred); + su->setIndirectR(NULL); // prevent read fault when the image is not actually bound CmpInstruction *pred1 = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; @@ -2052,7 +2057,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) assert(format->components != 0); bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred1->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred1->getDef(0)); } su->setPredicate(CC_NOT_P, pred1->getDef(0)); @@ -2247,13 +2252,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) // calculate pixel offset if (su->op == OP_SULDP || su->op == OP_SUREDP) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless); su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v)); } // add array layer offset if (su->tex.target.isArray() || su->tex.target.isCube()) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless); assert(dim > 1); su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)); } @@ -2262,7 +2267,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) CmpInstruction *pred = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; int blockwidth = format->bits[0] + format->bits[1] + @@ -2272,7 +2277,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) // make sure that the format doesn't mismatch when it's not FMT_NONE bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred->getDef(0)); } su->setPredicate(CC_NOT_P, pred->getDef(0)); @@ -2361,7 +2366,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) CmpInstruction *pred = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; int blockwidth = format->bits[0] + format->bits[1] + @@ -2371,7 +2376,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) // make sure that the format doesn't mismatch when it's not FMT_NONE bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred->getDef(0)); } su->setPredicate(CC_NOT_P, pred->getDef(0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 7fae7e24b99..37d52976657 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -130,7 +130,7 @@ private: Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base); Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base); Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base); - Value *loadSuInfo32(Value *ptr, int slot, uint32_t off); + Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); Value *loadBufInfo64(Value *ptr, uint32_t off); Value *loadBufLength32(Value *ptr, uint32_t off); Value *loadUboInfo64(Value *ptr, uint32_t off); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index c5b625ecb45..0729c88dffa 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -108,9 +108,9 @@ /* 6 user uniform buffers, at 64K each */ #define NVC0_CB_USR_INFO(s) (s << 16) #define NVC0_CB_USR_SIZE (6 << 16) -/* 6 driver constbuts, at 2K each */ -#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 11) -#define NVC0_CB_AUX_SIZE (1 << 11) +/* 6 driver constbuts, at 64K each */ +#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 16) +#define NVC0_CB_AUX_SIZE (1 << 16) /* XXX: Figure out what this UNK data is. */ #define NVC0_CB_AUX_UNK_INFO 0x000 #define NVC0_CB_AUX_UNK_SIZE (8 * 4) @@ -146,6 +146,9 @@ /* 1 64-bits address and 1 32-bits sequence */ #define NVC0_CB_AUX_MP_INFO 0x620 #define NVC0_CB_AUX_MP_SIZE 3 * 4 +/* 512 64-byte blocks for bindless image handles */ +#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4 +#define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4) /* 4 32-bits floats for the vertex runout, put at the end */ #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6) @@ -355,7 +358,8 @@ void nvc0_validate_textures(struct nvc0_context *); void nvc0_validate_samplers(struct nvc0_context *); void nve4_set_tex_handles(struct nvc0_context *); void nvc0_validate_surfaces(struct nvc0_context *); -void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_image_view *, +void nve4_set_surface_info(struct nouveau_pushbuf *, + const struct pipe_image_view *, struct nvc0_context *); void nvc0_mark_image_range_valid(const struct pipe_image_view *); bool nvc0_update_tic(struct nvc0_context *, struct nv50_tic_entry *, diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 46a15d76df0..e6157f550d6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -592,6 +592,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, if (info->target >= NVISA_GK104_CHIPSET) { info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO; + info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0); } if (prog->type == PIPE_SHADER_COMPUTE) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index cac4bb89271..e51d5163539 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -1057,7 +1057,8 @@ nvc0_screen_create(struct nouveau_device *dev) if (ret) FAIL_SCREEN_INIT("Error allocating TEXT area: %d\n", ret); - ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL, + /* 6 user uniform areas, 6 driver areas, and 1 for the runout */ + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 13 << 16, NULL, &screen->uniform_bo); if (ret) FAIL_SCREEN_INIT("Error allocating uniform BO: %d\n", ret); @@ -1279,8 +1280,11 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_KICK (push); - screen->tic.entries = CALLOC(4096, sizeof(void *)); - screen->tsc.entries = screen->tic.entries + 2048; + screen->tic.entries = CALLOC( + NVC0_TIC_MAX_ENTRIES + NVC0_TSC_MAX_ENTRIES + NVE4_IMG_MAX_HANDLES, + sizeof(void *)); + screen->tsc.entries = screen->tic.entries + NVC0_TIC_MAX_ENTRIES; + screen->img.entries = (void *)(screen->tsc.entries + NVC0_TSC_MAX_ENTRIES); if (!nvc0_blitter_create(screen)) goto fail; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 30635c757f1..efd62a8a412 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -13,6 +13,7 @@ #define NVC0_TIC_MAX_ENTRIES 2048 #define NVC0_TSC_MAX_ENTRIES 2048 +#define NVE4_IMG_MAX_HANDLES 512 /* doesn't count driver-reserved slot */ #define NVC0_MAX_PIPE_CONSTBUFS 15 @@ -97,6 +98,11 @@ struct nvc0_screen { } tsc; struct { + struct pipe_image_view **entries; + int next; + } img; + + struct { struct nouveau_bo *bo; uint32_t *map; } fence; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 88cec7fcef9..9e391fe1acf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -876,20 +876,13 @@ nve4_make_texture_handle_resident(struct pipe_context *pipe, } } -void -nvc0_init_bindless_functions(struct pipe_context *pipe) { - pipe->create_texture_handle = nve4_create_texture_handle; - pipe->delete_texture_handle = nve4_delete_texture_handle; - pipe->make_texture_handle_resident = nve4_make_texture_handle_resident; -} - static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT]; static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT]; static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT]; static void -nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height, - int *depth) +nvc0_get_surface_dims(const struct pipe_image_view *view, + int *width, int *height, int *depth) { struct nv04_resource *res = nv04_resource(view->resource); int level; @@ -937,7 +930,7 @@ nvc0_mark_image_range_valid(const struct pipe_image_view *view) void nve4_set_surface_info(struct nouveau_pushbuf *push, - struct pipe_image_view *view, + const struct pipe_image_view *view, struct nvc0_context *nvc0) { struct nvc0_screen *screen = nvc0->screen; @@ -1070,7 +1063,7 @@ nve4_set_surface_info(struct nouveau_pushbuf *push, static inline void nvc0_set_surface_info(struct nouveau_pushbuf *push, - struct pipe_image_view *view, uint64_t address, + const struct pipe_image_view *view, uint64_t address, int width, int height, int depth) { struct nv04_resource *res; @@ -1319,6 +1312,91 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0) } } +static uint64_t +nve4_create_image_handle(struct pipe_context *pipe, + const struct pipe_image_view *view) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + int i = screen->img.next, s; + + while (screen->img.entries[i]) { + i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1); + if (i == screen->img.next) + return 0; + } + + screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1); + screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view)); + *screen->img.entries[i] = *view; + + for (s = 0; s < 6; s++) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16); + PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i)); + nve4_set_surface_info(push, view, nvc0); + } + + return 0x100000000ULL | i; +} + +static void +nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_screen *screen = nvc0->screen; + int i = handle & (NVE4_IMG_MAX_HANDLES - 1); + + free(screen->img.entries[i]); + screen->img.entries[i] = NULL; +} + +static void +nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle, + unsigned access, bool resident) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_screen *screen = nvc0->screen; + + if (resident) { + struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident)); + struct pipe_image_view *view = + screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)]; + assert(view); + + if (view->resource->target == PIPE_BUFFER && + access & PIPE_IMAGE_ACCESS_WRITE) + nvc0_mark_image_range_valid(view); + res->handle = handle; + res->buf = nv04_resource(view->resource); + res->flags = (access & 3) << 8; + list_add(&res->list, &nvc0->img_head); + } else { + list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) { + if (pos->handle == handle) { + list_del(&pos->list); + free(pos); + break; + } + } + } +} + +void +nvc0_init_bindless_functions(struct pipe_context *pipe) { + pipe->create_texture_handle = nve4_create_texture_handle; + pipe->delete_texture_handle = nve4_delete_texture_handle; + pipe->make_texture_handle_resident = nve4_make_texture_handle_resident; + + pipe->create_image_handle = nve4_create_image_handle; + pipe->delete_image_handle = nve4_delete_image_handle; + pipe->make_image_handle_resident = nve4_make_image_handle_resident; +} + static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] = { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 4cd3712203b..66de6d9e2fa 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -977,6 +977,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) resident->flags); } + list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) { + nvc0_add_resident(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS, resident->buf, + resident->flags); + } + nvc0_state_validate_3d(nvc0, ~0); if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) { diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index db501afb9d1..f641f4777df 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -686,6 +686,11 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) resident->flags); } + list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) { + nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf, + resident->flags); + } + ret = !nve4_state_validate_cp(nvc0, ~0); if (ret) goto out; |