diff options
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_compute.c')
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_compute.c | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_compute.c b/src/gallium/drivers/nvc0/nvc0_compute.c new file mode 100644 index 00000000000..464b72f34d4 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_compute.c @@ -0,0 +1,271 @@ +/* + * Copyright 2013 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller, Samuel Pitoiset + */ + +#include "nvc0_context.h" +#include "nvc0_compute.h" + +int +nvc0_screen_compute_setup(struct nvc0_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_object *chan = screen->base.channel; + struct nouveau_device *dev = screen->base.device; + uint32_t obj_class; + int ret; + int i; + + switch (dev->chipset & 0xf0) { + case 0xc0: + if (dev->chipset == 0xc8) + obj_class = NVC8_COMPUTE_CLASS; + else + obj_class = NVC0_COMPUTE_CLASS; + break; + case 0xd0: + obj_class = NVC0_COMPUTE_CLASS; + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, + &screen->compute); + if (ret) { + NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); + return ret; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL, + &screen->parm); + if (ret) + return ret; + + BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->oclass); + + /* hardware limit */ + BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); + PUSH_DATA (push, screen->mp_count); + BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); + PUSH_DATA (push, 0xf); + + BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); + PUSH_DATA (push, 0x8000); + + /* global memory setup */ + BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + PUSH_DATA (push, 0); + BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); + for (i = 0; i <= 0xff; i++) + PUSH_DATA (push, (0xc << 28) | (i << 16) | i); + BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + PUSH_DATA (push, 1); + + /* local memory and cstack setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); + PUSH_DATAh(push, screen->tls->size); + PUSH_DATA (push, screen->tls->size); + BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); + PUSH_DATA (push, 1 << 24); + + /* shared memory setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); + PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); + PUSH_DATA (push, 2 << 24); + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); + PUSH_DATA (push, 0); + + /* code segment setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + + /* bind parameters buffer */ + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, screen->parm->size); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + + /* TODO: textures & samplers */ + + return 0; +} + +boolean +nvc0_compute_validate_program(struct nvc0_context *nvc0) +{ + struct nvc0_program *prog = nvc0->compprog; + + if (prog->mem) + return TRUE; + + if (!prog->translated) { + prog->translated = nvc0_program_translate( + prog, nvc0->screen->base.device->chipset); + if (!prog->translated) + return FALSE; + } + if (unlikely(!prog->code_size)) + return FALSE; + + if (likely(prog->code_size)) { + if (nvc0_program_upload_code(nvc0, prog)) { + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); + return TRUE; + } + } + return FALSE; +} + +static boolean +nvc0_compute_state_validate(struct nvc0_context *nvc0) +{ + if (!nvc0_compute_validate_program(nvc0)) + return FALSE; + + /* TODO: textures, samplers, surfaces, global memory buffers */ + + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE); + + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf))) + return FALSE; + if (unlikely(nvc0->state.flushed)) + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE); + + return TRUE; + +} + +static void +nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + struct nvc0_program *cp = nvc0->compprog; + + if (cp->parm_size) { + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, align(cp->parm_size, 0x100)); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ + BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4); + PUSH_DATA (push, 0); + PUSH_DATAp(push, input, cp->parm_size / 4); + + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); + } +} + +void +nvc0_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, + const void *input) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *cp = nvc0->compprog; + unsigned s, i; + int ret; + + ret = !nvc0_compute_state_validate(nvc0); + if (ret) + goto out; + + nvc0_compute_upload_input(nvc0, input); + + BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); + PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); + + BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); + PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ + + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); + PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); + PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); + PUSH_DATA (push, cp->num_barriers); + BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); + PUSH_DATA (push, cp->num_gprs); + + /* grid/block setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); + PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); + PUSH_DATA (push, grid_layout[2]); + BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); + PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); + PUSH_DATA (push, block_layout[2]); + + /* launch preliminary setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); + PUSH_DATA (push, 0x1); + BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); + + /* kernel launching */ + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0x1000); + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); + PUSH_DATA (push, 0x1); + + /* rebind all the 3D constant buffers + * (looks like binding a CB on COMPUTE clobbers 3D state) */ + nvc0->dirty |= NVC0_NEW_CONSTBUF; + for (s = 0; s < 6; s++) { + for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) + if (nvc0->constbuf[s][i].u.buf) + nvc0->constbuf_dirty[s] |= 1 << i; + } + memset(nvc0->state.uniform_buffer_bound, 0, + sizeof(nvc0->state.uniform_buffer_bound)); + +out: + if (ret) + NOUVEAU_ERR("Failed to launch grid !\n"); +} |