summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvc0/nvc0_compute.c
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2013-07-25 10:35:35 +0200
committerChristoph Bumiller <[email protected]>2013-08-06 22:22:49 +0200
commit9dcd7888e6338f08a6999abfbc2ca1008f741bf8 (patch)
treec441cee524f6784fb2d2b7467cd5131f008a604d /src/gallium/drivers/nvc0/nvc0_compute.c
parent981b5891016580a72c2d6a457c6e8ef2dd5a9c95 (diff)
nvc0: implement compute support for nvc0
Tested on nvc0, nvc1, nvcf and nvd9.
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_compute.c')
-rw-r--r--src/gallium/drivers/nvc0/nvc0_compute.c271
1 files changed, 271 insertions, 0 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_compute.c b/src/gallium/drivers/nvc0/nvc0_compute.c
new file mode 100644
index 00000000000..464b72f34d4
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_compute.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2013 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller, Samuel Pitoiset
+ */
+
+#include "nvc0_context.h"
+#include "nvc0_compute.h"
+
+int
+nvc0_screen_compute_setup(struct nvc0_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_object *chan = screen->base.channel;
+ struct nouveau_device *dev = screen->base.device;
+ uint32_t obj_class;
+ int ret;
+ int i;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xc0:
+ if (dev->chipset == 0xc8)
+ obj_class = NVC8_COMPUTE_CLASS;
+ else
+ obj_class = NVC0_COMPUTE_CLASS;
+ break;
+ case 0xd0:
+ obj_class = NVC0_COMPUTE_CLASS;
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
+ return ret;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
+ &screen->parm);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->oclass);
+
+ /* hardware limit */
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
+ PUSH_DATA (push, screen->mp_count);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
+ PUSH_DATA (push, 0xf);
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
+ PUSH_DATA (push, 0x8000);
+
+ /* global memory setup */
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
+ for (i = 0; i <= 0xff; i++)
+ PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ PUSH_DATA (push, 1);
+
+ /* local memory and cstack setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->size);
+ PUSH_DATA (push, screen->tls->size);
+ BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
+ PUSH_DATA (push, 1 << 24);
+
+ /* shared memory setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
+ PUSH_DATA (push, 2 << 24);
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
+ PUSH_DATA (push, 0);
+
+ /* code segment setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+
+ /* bind parameters buffer */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, screen->parm->size);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+
+ /* TODO: textures & samplers */
+
+ return 0;
+}
+
+boolean
+nvc0_compute_validate_program(struct nvc0_context *nvc0)
+{
+ struct nvc0_program *prog = nvc0->compprog;
+
+ if (prog->mem)
+ return TRUE;
+
+ if (!prog->translated) {
+ prog->translated = nvc0_program_translate(
+ prog, nvc0->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ }
+ if (unlikely(!prog->code_size))
+ return FALSE;
+
+ if (likely(prog->code_size)) {
+ if (nvc0_program_upload_code(nvc0, prog)) {
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static boolean
+nvc0_compute_state_validate(struct nvc0_context *nvc0)
+{
+ if (!nvc0_compute_validate_program(nvc0))
+ return FALSE;
+
+ /* TODO: textures, samplers, surfaces, global memory buffers */
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
+ return FALSE;
+ if (unlikely(nvc0->state.flushed))
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+
+ return TRUE;
+
+}
+
+static void
+nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nvc0_program *cp = nvc0->compprog;
+
+ if (cp->parm_size) {
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, align(cp->parm_size, 0x100));
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+ /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
+ BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
+ }
+}
+
+void
+nvc0_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label,
+ const void *input)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *cp = nvc0->compprog;
+ unsigned s, i;
+ int ret;
+
+ ret = !nvc0_compute_state_validate(nvc0);
+ if (ret)
+ goto out;
+
+ nvc0_compute_upload_input(nvc0, input);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
+ PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
+ PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
+ PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
+ PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
+ PUSH_DATA (push, cp->num_barriers);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
+ PUSH_DATA (push, cp->num_gprs);
+
+ /* grid/block setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
+ PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
+ PUSH_DATA (push, grid_layout[2]);
+ BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
+ PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
+ PUSH_DATA (push, block_layout[2]);
+
+ /* launch preliminary setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
+ PUSH_DATA (push, 0x1);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
+
+ /* kernel launching */
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x1000);
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
+ PUSH_DATA (push, 0x1);
+
+ /* rebind all the 3D constant buffers
+ * (looks like binding a CB on COMPUTE clobbers 3D state) */
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ for (s = 0; s < 6; s++) {
+ for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
+ if (nvc0->constbuf[s][i].u.buf)
+ nvc0->constbuf_dirty[s] |= 1 << i;
+ }
+ memset(nvc0->state.uniform_buffer_bound, 0,
+ sizeof(nvc0->state.uniform_buffer_bound));
+
+out:
+ if (ret)
+ NOUVEAU_ERR("Failed to launch grid !\n");
+}