diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 18 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 28 |
2 files changed, 36 insertions, 10 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 87ded68e5a7..176e0cf608d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2464,18 +2464,16 @@ NVC0LoweringPass::handleLDST(Instruction *i) assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP } } else if (i->src(0).getFile() == FILE_MEMORY_CONST) { + int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1; + Value *ind = i->getIndirect(0, 1); + if (targ->getChipset() >= NVISA_GK104_CHIPSET && - prog->getType() == Program::TYPE_COMPUTE) { + prog->getType() == Program::TYPE_COMPUTE && + (fileIndex >= 6 || ind)) { // The launch descriptor only allows to set up 8 CBs, but OpenGL - // requires at least 12 UBOs. To bypass this limitation, we store the - // addrs into the driver constbuf and we directly load from the global - // memory. - int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1; - Value *ind = i->getIndirect(0, 1); - - if (!ind && fileIndex == -1) - return; - + // requires at least 12 UBOs. To bypass this limitation, for constant + // buffers 7+, we store the addrs into the driver constbuf and we + // directly load from the global memory. if (ind) { // Clamp the UBO index when an indirect access is used to avoid // loading information from the wrong place in the driver cb. diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index 28460f8cbeb..8aa8d4936f3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -552,6 +552,30 @@ nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size) } static void +nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc) +{ + // only user constant buffers 1-6 can be put in the descriptor, the rest are + // loaded through global memory + for (int i = 1; i <= 6; i++) { + if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf) + continue; + + struct nv04_resource *res = + nv04_resource(nvc0->constbuf[5][i].u.buf); + + uint32_t base = res->offset + nvc0->constbuf[5][i].offset; + uint32_t size = nvc0->constbuf[5][i].size; + if (gp100) + gp100_cp_launch_desc_set_cb(desc, i, res->bo, base, size); + else + nve4_cp_launch_desc_set_cb(desc, i, res->bo, base, size); + } + + // there is no need to do FLUSH(NVE4_COMPUTE_FLUSH_CB) because + // nve4_compute_upload_input() does it later +} + +static void nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, struct nve4_cp_launch_desc *desc, const struct pipe_grid_info *info) @@ -588,6 +612,8 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, } nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, NVC0_CB_AUX_INFO(5), 1 << 11); + + nve4_compute_setup_buf_cb(nvc0, false, desc); } static void @@ -626,6 +652,8 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, } gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, NVC0_CB_AUX_INFO(5), 1 << 11); + + nve4_compute_setup_buf_cb(nvc0, true, desc); } static inline void * |