summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.c320
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h444
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c30
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h23
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c22
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h7
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c61
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c99
10 files changed, 1006 insertions, 9 deletions
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 83f81135590..c2ff8e9b46e 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -64,6 +64,7 @@ NV50_C_SOURCES := \
nv50/nv50_3ddefs.xml.h \
nv50/nv50_3d.xml.h \
nv50/nv50_blit.h \
+ nv50/nv50_compute.c \
nv50/nv50_context.c \
nv50/nv50_context.h \
nv50/nv50_defs.xml.h \
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
new file mode 100644
index 00000000000..6d23fd66945
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2012 Francisco Jerez
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_compute.xml.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+int
+nv50_screen_compute_setup(struct nv50_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_device *dev = screen->base.device;
+ struct nouveau_object *chan = screen->base.channel;
+ struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
+ unsigned obj_class;
+ int i, ret;
+
+ switch (dev->chipset & 0xf0) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ obj_class = NV50_COMPUTE_CLASS;
+ break;
+ case 0xa0:
+ switch (dev->chipset) {
+ case 0xa3:
+ case 0xa5:
+ case 0xa8:
+ obj_class = NVA3_COMPUTE_CLASS;
+ break;
+ default:
+ obj_class = NV50_COMPUTE_CLASS;
+ break;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret)
+ return ret;
+
+ BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->handle);
+
+ BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->stack_bo->offset);
+ PUSH_DATA (push, screen->stack_bo->offset);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
+ PUSH_DATA (push, 4);
+
+ BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
+ PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
+ BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
+ PUSH_DATA (push, 0x100);
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
+ PUSH_DATA (push, fifo->vram);
+
+ for (i = 0; i < 15; i++) {
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
+ PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+ }
+
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
+ PUSH_DATA (push, ~0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
+ PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
+ PUSH_DATA (push, 7);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
+ PUSH_DATA (push, 7);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
+ PUSH_DATA (push, 0x54);
+ BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
+ PUSH_DATA (push, fifo->vram);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls_bo->offset + 65536);
+ PUSH_DATA (push, screen->tls_bo->offset + 65536);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
+ PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
+
+ return 0;
+}
+
+static bool
+nv50_compute_validate_program(struct nv50_context *nv50)
+{
+ struct nv50_program *prog = nv50->compprog;
+
+ if (prog->mem)
+ return true;
+
+ if (!prog->translated) {
+ prog->translated = nv50_program_translate(
+ prog, nv50->screen->base.device->chipset, &nv50->base.debug);
+ if (!prog->translated)
+ return false;
+ }
+ if (unlikely(!prog->code_size))
+ return false;
+
+ if (likely(prog->code_size)) {
+ if (nv50_program_upload_code(nv50, prog)) {
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+nv50_compute_validate_globals(struct nv50_context *nv50)
+{
+ unsigned i;
+
+ for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource *res = *util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, i);
+ if (res)
+ nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
+ nv04_resource(res), NOUVEAU_BO_RDWR);
+ }
+}
+
+static bool
+nv50_compute_state_validate(struct nv50_context *nv50)
+{
+ if (!nv50_compute_validate_program(nv50))
+ return false;
+
+ if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
+ nv50_compute_validate_globals(nv50);
+
+ /* TODO: validate textures, samplers, surfaces */
+
+ nv50_bufctx_fence(nv50->bufctx_cp, false);
+
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
+ return false;
+ if (unlikely(nv50->state.flushed))
+ nv50_bufctx_fence(nv50->bufctx_cp, true);
+
+ return true;
+}
+
+static void
+nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+ unsigned size = align(nv50->compprog->parm_size, 0x4);
+
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ PUSH_DATA (push, (size / 4) << 8);
+
+ if (size) {
+ struct nouveau_mm_allocation *mm;
+ struct nouveau_bo *bo = NULL;
+ unsigned offset;
+
+ mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
+ assert(mm);
+
+ nouveau_bo_map(bo, 0, screen->base.client);
+ memcpy(bo->map + offset, input, size);
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
+ nouveau_pushbuf_data(push, bo, offset, size);
+
+ nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
+ nouveau_bo_ref(NULL, &bo);
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+ }
+}
+
+static uint32_t
+nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
+{
+ struct nv50_program *prog = nv50->compprog;
+ const struct nv50_ir_prog_symbol *syms =
+ (const struct nv50_ir_prog_symbol *)prog->cp.syms;
+ unsigned i;
+
+ for (i = 0; i < prog->cp.num_syms; ++i) {
+ if (syms[i].label == label)
+ return prog->code_base + syms[i].offset;
+ }
+ return prog->code_base; /* no symbols or symbol not found */
+}
+
+void
+nv50_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label, const void *input)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
+ struct nv50_program *cp = nv50->compprog;
+ bool ret;
+
+ ret = !nv50_compute_state_validate(nv50);
+ if (ret) {
+ NOUVEAU_ERR("Failed to launch grid !\n");
+ return;
+ }
+
+ nv50_compute_upload_input(nv50, input);
+
+ BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
+ PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));
+
+ BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
+ PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
+ BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, cp->max_gpr);
+
+ /* grid/block setup */
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
+ PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
+ PUSH_DATA (push, block_layout[2]);
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
+ PUSH_DATA (push, 1 << 16 | block_size);
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
+ PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
+ BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
+ PUSH_DATA (push, 1);
+
+ /* kernel launching */
+ BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+ /* bind a compute shader clobbers fragment shader state */
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
new file mode 100644
index 00000000000..268d11253b6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
@@ -0,0 +1,444 @@
+#ifndef NV50_COMPUTE_XML
+#define NV50_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/graph/g80_compute.xml ( 14027 bytes, from 2015-02-14 02:01:36)
+- rnndb/copyright.xml ( 6456 bytes, from 2015-02-14 02:01:36)
+- rnndb/nvchipsets.xml ( 2833 bytes, from 2015-04-28 16:28:33)
+- rnndb/fifo/nv_object.xml ( 15390 bytes, from 2015-04-22 20:36:09)
+- rnndb/g80_defs.xml ( 18210 bytes, from 2015-10-19 20:49:59)
+
+Copyright (C) 2006-2015 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Koƛcielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NV50_COMPUTE_DMA_NOTIFY 0x00000180
+
+#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0
+
+#define NV50_COMPUTE_DMA_QUERY 0x000001a4
+
+#define NV50_COMPUTE_DMA_LOCAL 0x000001b8
+
+#define NV50_COMPUTE_DMA_STACK 0x000001bc
+
+#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0
+
+#define NV50_COMPUTE_DMA_TSC 0x000001c4
+
+#define NV50_COMPUTE_DMA_TIC 0x000001c8
+
+#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc
+
+#define NV50_COMPUTE_UNK0200 0x00000200
+#define NV50_COMPUTE_UNK0200_UNK1__MASK 0x0000ffff
+#define NV50_COMPUTE_UNK0200_UNK1__SHIFT 0
+#define NV50_COMPUTE_UNK0200_UNK2__MASK 0x00ff0000
+#define NV50_COMPUTE_UNK0200_UNK2__SHIFT 16
+
+#define NV50_COMPUTE_UNK0204 0x00000204
+
+#define NV50_COMPUTE_UNK0208 0x00000208
+
+#define NV50_COMPUTE_UNK020C 0x0000020c
+
+#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210
+
+#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214
+
+#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218
+
+#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c
+
+#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220
+
+#define NV50_COMPUTE_CALL_LIMIT_LOG 0x00000224
+
+#define NV50_COMPUTE_UNK0228 0x00000228
+#define NV50_COMPUTE_UNK0228_UNK0 0x00000001
+#define NV50_COMPUTE_UNK0228_UNK4__MASK 0x00000ff0
+#define NV50_COMPUTE_UNK0228_UNK4__SHIFT 4
+#define NV50_COMPUTE_UNK0228_UNK12__MASK 0x000ff000
+#define NV50_COMPUTE_UNK0228_UNK12__SHIFT 12
+
+#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c
+
+#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230
+#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NV50_COMPUTE_TSC_LIMIT 0x00000234
+#define NV50_COMPUTE_TSC_LIMIT__MAX 0x00001fff
+
+#define NV50_COMPUTE_CB_ADDR 0x00000238
+#define NV50_COMPUTE_CB_ADDR_ID__MASK 0x003fff00
+#define NV50_COMPUTE_CB_ADDR_ID__SHIFT 8
+#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK 0x0000007f
+#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT 0
+
+#define NV50_COMPUTE_CB_DATA(i0) (0x0000023c + 0x4*(i0))
+#define NV50_COMPUTE_CB_DATA__ESIZE 0x00000004
+#define NV50_COMPUTE_CB_DATA__LEN 0x00000010
+
+#define NV50_COMPUTE_TSC_FLUSH 0x0000027c
+#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_COMPUTE_TIC_FLUSH 0x00000280
+#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_COMPUTE_DELAY1 0x00000284
+
+#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288
+
+#define NV50_COMPUTE_DELAY2 0x0000028c
+
+#define NV50_COMPUTE_UNK0290 0x00000290
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN 0x00000100
+
+#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c
+
+#define NV50_COMPUTE_UNK02A0 0x000002a0
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8
+
+#define NV50_COMPUTE_CB_DEF_SET 0x000002ac
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK 0x0000ffff
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT 0
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK 0x007f0000
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT 16
+
+#define NV50_COMPUTE_UNK02B0 0x000002b0
+
+#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK 0x0000ffff
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT 0
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK 0x00ff0000
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT 16
+
+#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8
+
+#define NV50_COMPUTE_UNK02BC 0x000002bc
+#define NV50_COMPUTE_UNK02BC_UNK1__MASK 0x00000007
+#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT 0
+#define NV50_COMPUTE_UNK02BC_UNK2__MASK 0x00000070
+#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT 4
+
+#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0
+
+#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4
+
+#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8
+
+#define NV50_COMPUTE_TIC_LIMIT 0x000002cc
+
+#define NV50_COMPUTE_MP_PM_SET(i0) (0x000002d0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NV50_COMPUTE_MP_PM_SET__LEN 0x00000004
+
+#define NV50_COMPUTE_MP_PM_CONTROL(i0) (0x000002e0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE 0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL__LEN 0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK 0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT 0
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP 0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE 0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK 0x00000070
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT 4
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0 0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1 0x00000010
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2 0x00000020
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3 0x00000030
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4 0x00000040
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5 0x00000050
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK 0x00ffff00
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT 8
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK 0xff000000
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT 24
+
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE 0x000002f0
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0 0x00000001
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1 0x00000002
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2 0x00000004
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3 0x00000008
+
+#define NV50_COMPUTE_UNK02F4 0x000002f4
+
+#define NV50_COMPUTE_BLOCKDIM_LATCH 0x000002f8
+
+#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc
+
+#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300
+
+#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304
+
+#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308
+
+#define NV50_COMPUTE_UNK030C 0x0000030c
+
+#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310
+
+#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314
+
+#define NV50_COMPUTE_QUERY_SEQUENCE 0x00000318
+
+#define NV50_COMPUTE_QUERY_GET 0x0000031c
+#define NV50_COMPUTE_QUERY_GET_INTR 0x00000200
+#define NV50_COMPUTE_QUERY_GET_SHORT 0x00008000
+
+#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320
+
+#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324
+
+#define NV50_COMPUTE_COND_MODE 0x00000328
+#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NV50_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_COMPUTE_UNK032C 0x0000032c
+
+#define NV50_COMPUTE_UNK0330 0x00000330
+
+#define NV50_COMPUTE_UNK0334(i0) (0x00000334 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0334__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0334__LEN 0x00000003
+
+#define NV50_COMPUTE_UNK0340(i0) (0x00000340 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0340__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0340__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0348(i0) (0x00000348 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0348__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0348__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0350(i0) (0x00000350 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0350__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0350__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0358 0x00000358
+
+#define NV50_COMPUTE_UNK035C 0x0000035c
+
+#define NV50_COMPUTE_UNK0360 0x00000360
+#define NV50_COMPUTE_UNK0360_UNK0__MASK 0x000000f0
+#define NV50_COMPUTE_UNK0360_UNK0__SHIFT 4
+#define NV50_COMPUTE_UNK0360_UNK1__MASK 0x00000f00
+#define NV50_COMPUTE_UNK0360_UNK1__SHIFT 8
+
+#define NV50_COMPUTE_UNK0364 0x00000364
+
+#define NV50_COMPUTE_LAUNCH 0x00000368
+
+#define NV50_COMPUTE_UNK036C 0x0000036c
+
+#define NV50_COMPUTE_UNK0370 0x00000370
+
+#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK 0x000000ff
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT 0
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK 0x0000ff00
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT 8
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX 0x00000040
+
+#define NV50_COMPUTE_LINKED_TSC 0x00000378
+
+#define NV50_COMPUTE_UNK037C 0x0000037c
+#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV 0x00000001
+#define NV50_COMPUTE_UNK037C_UNK16 0x00010000
+
+#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380
+
+#define NV50_COMPUTE_UNK0384 0x00000384
+
+#define NV50_COMPUTE_GRIDID 0x00000388
+
+#define NV50_COMPUTE_UNK038C(i0) (0x0000038c + 0x4*(i0))
+#define NV50_COMPUTE_UNK038C__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK038C__LEN 0x00000003
+
+#define NV50_COMPUTE_WRCACHE_FLUSH 0x00000398
+
+#define NV50_COMPUTE_UNK039C(i0) (0x0000039c + 0x4*(i0))
+#define NV50_COMPUTE_UNK039C__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK039C__LEN 0x00000002
+
+#define NV50_COMPUTE_GRIDDIM 0x000003a4
+#define NV50_COMPUTE_GRIDDIM_X__MASK 0x0000ffff
+#define NV50_COMPUTE_GRIDDIM_X__SHIFT 0
+#define NV50_COMPUTE_GRIDDIM_Y__MASK 0xffff0000
+#define NV50_COMPUTE_GRIDDIM_Y__SHIFT 16
+
+#define NV50_COMPUTE_SHARED_SIZE 0x000003a8
+#define NV50_COMPUTE_SHARED_SIZE__MAX 0x00004000
+#define NV50_COMPUTE_SHARED_SIZE__ALIGN 0x00000040
+
+#define NV50_COMPUTE_BLOCKDIM_XY 0x000003ac
+#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK 0x0000ffff
+#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT 0
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK 0xffff0000
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT 16
+
+#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0
+#define NV50_COMPUTE_BLOCKDIM_Z__MIN 0x00000001
+#define NV50_COMPUTE_BLOCKDIM_Z__MAX 0x00000040
+
+#define NV50_COMPUTE_CP_START_ID 0x000003b4
+
+#define NV50_COMPUTE_REG_MODE 0x000003b8
+#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001
+#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002
+
+#define NV50_COMPUTE_TEX_LIMITS 0x000003bc
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NV50_COMPUTE_BIND_TSC 0x000003c0
+#define NV50_COMPUTE_BIND_TSC_VALID 0x00000001
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK 0x000000f0
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4
+#define NV50_COMPUTE_BIND_TSC_TSC__MASK 0x001ff000
+#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT 12
+
+#define NV50_COMPUTE_BIND_TIC 0x000003c4
+#define NV50_COMPUTE_BIND_TIC_VALID 0x00000001
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1
+#define NV50_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT 9
+
+#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK 0x00000f00
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT 8
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT 12
+#define NV50_COMPUTE_SET_PROGRAM_CB_VALID 0x000000ff
+
+#define NV50_COMPUTE_UNK03CC 0x000003cc
+
+#define NV50_COMPUTE_TEX_CACHE_CTL 0x000003d0
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NV50_COMPUTE_UNK03D4 0x000003d4
+
+#define NV50_COMPUTE_UNK03D8 0x000003d8
+
+#define NV50_COMPUTE_UNK03DC 0x000003dc
+
+#define NV50_COMPUTE_UNK03E0 0x000003e0
+
+#define NV50_COMPUTE_UNK03E4 0x000003e4
+
+#define NVA3_COMPUTE_TEX_MISC 0x000003e8
+#define NVA3_COMPUTE_TEX_MISC_UNK1 0x00000001
+#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002
+
+#define NV50_COMPUTE_GLOBAL(i0) (0x00000400 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL__ESIZE 0x00000020
+#define NV50_COMPUTE_GLOBAL__LEN 0x00000010
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0) (0x00000400 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0) (0x00000404 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_PITCH(i0) (0x00000408 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_PITCH__MAX 0x00800000
+#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN 0x00000100
+
+#define NV50_COMPUTE_GLOBAL_LIMIT(i0) (0x0000040c + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_MODE(i0) (0x00000410 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_MODE_LINEAR 0x00000001
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK 0x000000f0
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT 4
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK 0x00000f00
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT 8
+
+#define NV50_COMPUTE_USER_PARAM(i0) (0x00000600 + 0x4*(i0))
+#define NV50_COMPUTE_USER_PARAM__ESIZE 0x00000004
+#define NV50_COMPUTE_USER_PARAM__LEN 0x00000040
+
+#define NV50_COMPUTE_UNK0700(i0) (0x00000700 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0700__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0700__LEN 0x00000010
+
+
+#endif /* NV50_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 7867c2df7f3..f645a4d4e6b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
nouveau_bufctx_del(&nv50->bufctx_3d);
nouveau_bufctx_del(&nv50->bufctx);
+ nouveau_bufctx_del(&nv50->bufctx_cp);
util_unreference_framebuffer_state(&nv50->framebuffer);
@@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
if (!nv50->constbuf[s][i].user)
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
}
+
+ for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource **res = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, i);
+ pipe_resource_reference(res, NULL);
+ }
+ util_dynarray_fini(&nv50->global_residents);
}
static void
@@ -263,10 +272,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
nv50->base.pushbuf = screen->base.pushbuf;
nv50->base.client = screen->base.client;
- ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT,
- &nv50->bufctx_3d);
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT,
+ &nv50->bufctx_3d);
if (!ret)
- ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT,
+ &nv50->bufctx_cp);
if (ret)
goto out_err;
@@ -290,6 +302,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->draw_vbo = nv50_draw_vbo;
pipe->clear = nv50_clear;
+ pipe->launch_grid = nv50_launch_grid;
pipe->flush = nv50_flush;
pipe->texture_barrier = nv50_texture_barrier;
@@ -335,19 +348,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+ if (screen->compute) {
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo);
+ }
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
+ if (screen->compute)
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
nv50->base.scratch.bo_size = 2 << 20;
+ util_dynarray_init(&nv50->global_residents);
+
return pipe;
out_err:
if (nv50->bufctx_3d)
nouveau_bufctx_del(&nv50->bufctx_3d);
+ if (nv50->bufctx_cp)
+ nouveau_bufctx_del(&nv50->bufctx_cp);
if (nv50->bufctx)
nouveau_bufctx_del(&nv50->bufctx);
FREE(nv50->blit);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index fb74a9748a3..fbafe029948 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -49,6 +49,10 @@
#define NV50_NEW_MIN_SAMPLES (1 << 22)
#define NV50_NEW_CONTEXT (1 << 31)
+#define NV50_NEW_CP_PROGRAM (1 << 0)
+#define NV50_NEW_CP_GLOBALS (1 << 1)
+
+/* 3d bufctx (during draw_vbo, blit_3d) */
#define NV50_BIND_FB 0
#define NV50_BIND_VERTEX 1
#define NV50_BIND_VERTEX_TMP 2
@@ -58,7 +62,14 @@
#define NV50_BIND_SO 53
#define NV50_BIND_SCREEN 54
#define NV50_BIND_TLS 55
-#define NV50_BIND_COUNT 56
+#define NV50_BIND_3D_COUNT 56
+
+/* compute bufctx (during launch_grid) */
+#define NV50_BIND_CP_GLOBAL 0
+#define NV50_BIND_CP_SCREEN 1
+#define NV50_BIND_CP_COUNT 2
+
+/* bufctx for other operations */
#define NV50_BIND_2D 0
#define NV50_BIND_M2MF 0
#define NV50_BIND_FENCE 1
@@ -101,8 +112,10 @@ struct nv50_context {
struct nouveau_bufctx *bufctx_3d;
struct nouveau_bufctx *bufctx;
+ struct nouveau_bufctx *bufctx_cp;
uint32_t dirty;
+ uint32_t dirty_cp; /* dirty flags for compute state */
bool cb_dirty;
struct nv50_graph_state state;
@@ -115,6 +128,7 @@ struct nv50_context {
struct nv50_program *vertprog;
struct nv50_program *gmtyprog;
struct nv50_program *fragprog;
+ struct nv50_program *compprog;
struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[3];
@@ -163,6 +177,8 @@ struct nv50_context {
uint32_t cond_condmode; /* the calculated condition */
struct nv50_blitctx *blit;
+
+ struct util_dynarray global_residents;
};
static inline struct nv50_context *
@@ -302,4 +318,9 @@ struct pipe_video_buffer *
nv98_video_buffer_create(struct pipe_context *pipe,
const struct pipe_video_buffer *template);
+/* nv50_compute.c */
+void
+nv50_launch_grid(struct pipe_context *, const uint *, const uint *,
+ uint32_t, const void *);
+
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 707bf7a8ae3..48057d20f4e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -259,6 +259,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
return nv50_vertprog_assign_slots(info);
case PIPE_SHADER_FRAGMENT:
return nv50_fragprog_assign_slots(info);
+ case PIPE_SHADER_COMPUTE:
+ return 0;
default:
return -1;
}
@@ -355,6 +357,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->gp.has_layer = 0;
prog->gp.has_viewport = 0;
+ if (prog->type == PIPE_SHADER_COMPUTE)
+ info->prop.cp.inputOffset = 0x10;
+
info->driverPriv = prog;
#ifdef DEBUG
@@ -401,6 +406,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
break;
}
prog->gp.vert_count = info->prop.gp.maxVertices;
+ } else
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
}
if (prog->pipe.stream_output.num_outputs)
@@ -423,11 +432,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
struct nouveau_heap *heap;
int ret;
uint32_t size = align(prog->code_size, 0x40);
+ uint8_t prog_type;
switch (prog->type) {
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
+ case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break;
default:
assert(!"invalid program type");
return false;
@@ -450,7 +461,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
return false;
}
}
- prog->code_base = prog->mem->start;
+
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ /* CP code must be uploaded in FP code segment. */
+ prog_type = 1;
+ } else {
+ prog->code_base = prog->mem->start;
+ prog_type = prog->type;
+ }
ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
if (ret < 0) {
@@ -468,7 +486,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
false /* flatshade */);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
- (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+ (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 7a33eb11d6d..f0016707163 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -98,6 +98,13 @@ struct nv50_program {
ubyte viewportid; /* hw value of viewport index output */
} gp;
+ struct {
+ uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
+ uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
+ void *syms;
+ unsigned num_syms;
+ } cp;
+
void *fixups; /* relocation records */
void *interps; /* interpolation records */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index f47e998ab1e..0142e86ba20 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -41,8 +41,6 @@
#define THREADS_IN_WARP 32
-#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
-
static boolean
nv50_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_COMPUTE:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
@@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_COMPUTE:
break;
default:
return 0;
@@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
return 0.0f;
}
+static int
+nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+#define RET(x) do { \
+ if (data) \
+ memcpy(data, x, sizeof(x)); \
+ return sizeof(x); \
+} while (0)
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ RET((uint64_t []) { 2 });
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ RET(((uint64_t []) { 65535, 65535 }));
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ RET(((uint64_t []) { 512, 512, 64 }));
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ RET((uint64_t []) { 512 });
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
+ RET((uint64_t []) { 1ULL << 32 });
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ RET((uint64_t []) { 4096 });
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ RET((uint32_t []) { 32 });
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ RET((uint64_t []) { 1ULL << 40 });
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t []) { 0 });
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t []) { screen->mp_count });
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+ default:
+ return 0;
+ }
+
+#undef RET
+}
+
static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
@@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->tesla);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
nouveau_object_del(&screen->sync);
nouveau_screen_fini(&screen->base);
@@ -742,6 +788,7 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
+ pscreen->get_compute_param = nv50_screen_get_compute_param;
nv50_screen_init_resource_functions(pscreen);
@@ -851,6 +898,8 @@ nv50_screen_create(struct nouveau_device *dev)
screen->TPs = util_bitcount(value & 0xffff);
screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+ screen->mp_count = screen->TPs * screen->MPsInTP;
+
stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
STACK_WARPS_ALLOC * 64 * 8;
@@ -902,6 +951,12 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_hwctx(screen);
+ ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
+ if (ret) {
+ NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
+ goto fail;
+ }
+
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index ce51f0fc254..153ceea7a4f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -23,6 +23,10 @@ struct nv50_context;
#define NV50_MAX_VIEWPORTS 16
+#define NV50_MAX_GLOBALS 16
+
+#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
+
struct nv50_blitter;
struct nv50_graph_state {
@@ -66,6 +70,7 @@ struct nv50_screen {
unsigned MPsInTP;
unsigned max_tls_space;
unsigned cur_tls_space;
+ unsigned mp_count;
struct nouveau_heap *vp_code_heap;
struct nouveau_heap *gp_code_heap;
@@ -93,6 +98,7 @@ struct nv50_screen {
struct nouveau_object *sync;
struct nouveau_object *tesla;
+ struct nouveau_object *compute;
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
};
@@ -109,6 +115,8 @@ void nv50_blitter_destroy(struct nv50_screen *);
int nv50_screen_tic_alloc(struct nv50_screen *, void *);
int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
+int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *);
+
static inline void
nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
{
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index d27f12ca94b..b4ea08d4d13 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
nv50->dirty |= NV50_NEW_GMTYPROG;
}
+static void *
+nv50_cp_state_create(struct pipe_context *pipe,
+ const struct pipe_compute_state *cso)
+{
+ struct nv50_program *prog;
+
+ prog = CALLOC_STRUCT(nv50_program);
+ if (!prog)
+ return NULL;
+ prog->type = PIPE_SHADER_COMPUTE;
+
+ prog->cp.smem_size = cso->req_local_mem;
+ prog->cp.lmem_size = cso->req_private_mem;
+ prog->parm_size = cso->req_input_mem;
+
+ prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
+
+ return (void *)prog;
+}
+
+static void
+nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->compprog = hwcso;
+ nv50->dirty_cp |= NV50_NEW_CP_PROGRAM;
+}
+
static void
nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_constant_buffer *cb)
@@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
nv50->dirty |= NV50_NEW_STRMOUT;
}
+static void
+nv50_set_compute_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ /* TODO: bind surfaces */
+}
+
+static inline void
+nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
+{
+ struct nv04_resource *buf = nv04_resource(res);
+ if (buf) {
+ uint64_t limit = (buf->address + buf->base.width0) - 1;
+ if (limit < (1ULL << 32)) {
+ *phandle = (uint32_t)buf->address;
+ } else {
+ NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
+ "resource not contained within 32-bit address space !\n");
+ *phandle = 0;
+ }
+ } else {
+ *phandle = 0;
+ }
+}
+
+static void
+nv50_set_global_bindings(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_resource **ptr;
+ unsigned i;
+ const unsigned end = start + nr;
+
+ if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
+ const unsigned old_size = nv50->global_residents.size;
+ const unsigned req_size = end * sizeof(struct pipe_resource *);
+ util_dynarray_resize(&nv50->global_residents, req_size);
+ memset((uint8_t *)nv50->global_residents.data + old_size, 0,
+ req_size - old_size);
+ }
+
+ if (resources) {
+ ptr = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i) {
+ pipe_resource_reference(&ptr[i], resources[i]);
+ nv50_set_global_handle(handles[i], resources[i]);
+ }
+ } else {
+ ptr = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i)
+ pipe_resource_reference(&ptr[i], NULL);
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL);
+
+ nv50->dirty_cp = NV50_NEW_CP_GLOBALS;
+}
+
void
nv50_init_state_functions(struct nv50_context *nv50)
{
@@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->create_vs_state = nv50_vp_state_create;
pipe->create_fs_state = nv50_fp_state_create;
pipe->create_gs_state = nv50_gp_state_create;
+ pipe->create_compute_state = nv50_cp_state_create;
pipe->bind_vs_state = nv50_vp_state_bind;
pipe->bind_fs_state = nv50_fp_state_bind;
pipe->bind_gs_state = nv50_gp_state_bind;
+ pipe->bind_compute_state = nv50_cp_state_bind;
pipe->delete_vs_state = nv50_sp_state_delete;
pipe->delete_fs_state = nv50_sp_state_delete;
pipe->delete_gs_state = nv50_sp_state_delete;
+ pipe->delete_compute_state = nv50_sp_state_delete;
pipe->set_blend_color = nv50_set_blend_color;
pipe->set_stencil_ref = nv50_set_stencil_ref;
@@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->stream_output_target_destroy = nv50_so_target_destroy;
pipe->set_stream_output_targets = nv50_set_stream_output_targets;
+ pipe->set_global_binding = nv50_set_global_bindings;
+ pipe->set_compute_resources = nv50_set_compute_resources;
+
nv50->sample_mask = ~0;
nv50->min_samples = 1;
}