summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvc0
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2013-07-25 10:35:35 +0200
committerChristoph Bumiller <[email protected]>2013-08-06 22:22:49 +0200
commit9dcd7888e6338f08a6999abfbc2ca1008f741bf8 (patch)
treec441cee524f6784fb2d2b7467cd5131f008a604d /src/gallium/drivers/nvc0
parent981b5891016580a72c2d6a457c6e8ef2dd5a9c95 (diff)
nvc0: implement compute support for nvc0
Tested on nvc0, nvc1, nvcf and nvd9.
Diffstat (limited to 'src/gallium/drivers/nvc0')
-rw-r--r--src/gallium/drivers/nvc0/Makefile.sources1
-rw-r--r--src/gallium/drivers/nvc0/nvc0_compute.c271
-rw-r--r--src/gallium/drivers/nvc0/nvc0_compute.h10
-rw-r--r--src/gallium/drivers/nvc0/nvc0_compute.xml.h410
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c4
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h4
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c5
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h1
-rw-r--r--src/gallium/drivers/nvc0/nve4_compute.c32
9 files changed, 706 insertions, 32 deletions
diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources
index db8d12347b0..815a27addd4 100644
--- a/src/gallium/drivers/nvc0/Makefile.sources
+++ b/src/gallium/drivers/nvc0/Makefile.sources
@@ -1,4 +1,5 @@
C_SOURCES := \
+ nvc0_compute.c \
nvc0_context.c \
nvc0_formats.c \
nvc0_miptree.c \
diff --git a/src/gallium/drivers/nvc0/nvc0_compute.c b/src/gallium/drivers/nvc0/nvc0_compute.c
new file mode 100644
index 00000000000..464b72f34d4
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_compute.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2013 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller, Samuel Pitoiset
+ */
+
+#include "nvc0_context.h"
+#include "nvc0_compute.h"
+
+int
+nvc0_screen_compute_setup(struct nvc0_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_object *chan = screen->base.channel;
+ struct nouveau_device *dev = screen->base.device;
+ uint32_t obj_class;
+ int ret;
+ int i;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xc0:
+ if (dev->chipset == 0xc8)
+ obj_class = NVC8_COMPUTE_CLASS;
+ else
+ obj_class = NVC0_COMPUTE_CLASS;
+ break;
+ case 0xd0:
+ obj_class = NVC0_COMPUTE_CLASS;
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
+ return ret;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
+ &screen->parm);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->oclass);
+
+ /* hardware limit */
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
+ PUSH_DATA (push, screen->mp_count);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
+ PUSH_DATA (push, 0xf);
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
+ PUSH_DATA (push, 0x8000);
+
+ /* global memory setup */
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
+ for (i = 0; i <= 0xff; i++)
+ PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ PUSH_DATA (push, 1);
+
+ /* local memory and cstack setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->size);
+ PUSH_DATA (push, screen->tls->size);
+ BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
+ PUSH_DATA (push, 1 << 24);
+
+ /* shared memory setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
+ PUSH_DATA (push, 2 << 24);
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
+ PUSH_DATA (push, 0);
+
+ /* code segment setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+
+ /* bind parameters buffer */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, screen->parm->size);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+
+ /* TODO: textures & samplers */
+
+ return 0;
+}
+
+boolean
+nvc0_compute_validate_program(struct nvc0_context *nvc0)
+{
+ struct nvc0_program *prog = nvc0->compprog;
+
+ if (prog->mem)
+ return TRUE;
+
+ if (!prog->translated) {
+ prog->translated = nvc0_program_translate(
+ prog, nvc0->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ }
+ if (unlikely(!prog->code_size))
+ return FALSE;
+
+ if (likely(prog->code_size)) {
+ if (nvc0_program_upload_code(nvc0, prog)) {
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static boolean
+nvc0_compute_state_validate(struct nvc0_context *nvc0)
+{
+ if (!nvc0_compute_validate_program(nvc0))
+ return FALSE;
+
+ /* TODO: textures, samplers, surfaces, global memory buffers */
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
+ return FALSE;
+ if (unlikely(nvc0->state.flushed))
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+
+ return TRUE;
+
+}
+
+static void
+nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nvc0_program *cp = nvc0->compprog;
+
+ if (cp->parm_size) {
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, align(cp->parm_size, 0x100));
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+ /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
+ BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
+ }
+}
+
+void
+nvc0_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label,
+ const void *input)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *cp = nvc0->compprog;
+ unsigned s, i;
+ int ret;
+
+ ret = !nvc0_compute_state_validate(nvc0);
+ if (ret)
+ goto out;
+
+ nvc0_compute_upload_input(nvc0, input);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
+ PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
+ PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
+ PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
+ PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
+ PUSH_DATA (push, cp->num_barriers);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
+ PUSH_DATA (push, cp->num_gprs);
+
+ /* grid/block setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
+ PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
+ PUSH_DATA (push, grid_layout[2]);
+ BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
+ PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
+ PUSH_DATA (push, block_layout[2]);
+
+ /* launch preliminary setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
+ PUSH_DATA (push, 0x1);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
+
+ /* kernel launching */
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x1000);
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
+ PUSH_DATA (push, 0x1);
+
+ /* rebind all the 3D constant buffers
+ * (looks like binding a CB on COMPUTE clobbers 3D state) */
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ for (s = 0; s < 6; s++) {
+ for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
+ if (nvc0->constbuf[s][i].u.buf)
+ nvc0->constbuf_dirty[s] |= 1 << i;
+ }
+ memset(nvc0->state.uniform_buffer_bound, 0,
+ sizeof(nvc0->state.uniform_buffer_bound));
+
+out:
+ if (ret)
+ NOUVEAU_ERR("Failed to launch grid !\n");
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_compute.h b/src/gallium/drivers/nvc0/nvc0_compute.h
new file mode 100644
index 00000000000..f2df7bed310
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_compute.h
@@ -0,0 +1,10 @@
+#ifndef NVC0_COMPUTE_H
+#define NVC0_COMPUTE_H
+
+#include "nv50/nv50_defs.xml.h"
+#include "nvc0_compute.xml.h"
+
+boolean
+nvc0_compute_validate_program(struct nvc0_context *nvc0);
+
+#endif /* NVC0_COMPUTE_H */
diff --git a/src/gallium/drivers/nvc0/nvc0_compute.xml.h b/src/gallium/drivers/nvc0/nvc0_compute.xml.h
new file mode 100644
index 00000000000..35e6bfdbea2
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_compute.xml.h
@@ -0,0 +1,410 @@
+#ifndef NVC0_COMPUTE_XML
+#define NVC0_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_compute.xml ( 11145 bytes, from 2013-04-27 14:00:13)
+- copyright.xml ( 6452 bytes, from 2013-02-27 22:13:22)
+- nvchipsets.xml ( 3954 bytes, from 2013-04-27 14:00:13)
+- nv_object.xml ( 14395 bytes, from 2013-04-27 14:00:13)
+- nv_defs.xml ( 4437 bytes, from 2013-02-27 22:13:22)
+- nv50_defs.xml ( 16652 bytes, from 2013-06-20 13:45:33)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- imirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Koƛcielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_COMPUTE_LOCAL_POS_ALLOC 0x00000204
+
+#define NVC0_COMPUTE_LOCAL_NEG_ALLOC 0x00000208
+
+#define NVC0_COMPUTE_WARP_CSTACK_SIZE 0x0000020c
+
+#define NVC0_COMPUTE_TEX_LIMITS 0x00000210
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NVC0_COMPUTE_SHARED_BASE 0x00000214
+
+#define NVC0_COMPUTE_MEM_BARRIER 0x0000021c
+#define NVC0_COMPUTE_MEM_BARRIER_UNK0 0x00000001
+#define NVC0_COMPUTE_MEM_BARRIER_UNK1 0x00000002
+#define NVC0_COMPUTE_MEM_BARRIER_UNK2 0x00000004
+#define NVC0_COMPUTE_MEM_BARRIER_UNK4 0x00000010
+#define NVC0_COMPUTE_MEM_BARRIER_UNK8 0x00000100
+#define NVC0_COMPUTE_MEM_BARRIER_UNK12 0x00001000
+
+#define NVC0_COMPUTE_BIND_TSC 0x00000228
+#define NVC0_COMPUTE_BIND_TSC_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TSC_SAMPLER__MASK 0x00000ff0
+#define NVC0_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4
+#define NVC0_COMPUTE_BIND_TSC_TSC__MASK 0x01fff000
+#define NVC0_COMPUTE_BIND_TSC_TSC__SHIFT 12
+
+#define NVC0_COMPUTE_BIND_TIC 0x0000022c
+#define NVC0_COMPUTE_BIND_TIC_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NVC0_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1
+#define NVC0_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NVC0_COMPUTE_BIND_TIC_TIC__SHIFT 9
+
+#define NVC0_COMPUTE_BIND_TSC2 0x00000230
+#define NVC0_COMPUTE_BIND_TSC2_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__MASK 0x00000010
+#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__SHIFT 4
+#define NVC0_COMPUTE_BIND_TSC2_TSC__MASK 0x01fff000
+#define NVC0_COMPUTE_BIND_TSC2_TSC__SHIFT 12
+
+#define NVC0_COMPUTE_BIND_TIC2 0x00000234
+#define NVC0_COMPUTE_BIND_TIC2_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__MASK 0x00000002
+#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__SHIFT 1
+#define NVC0_COMPUTE_BIND_TIC2_TIC__MASK 0x7ffffe00
+#define NVC0_COMPUTE_BIND_TIC2_TIC__SHIFT 9
+
+#define NVC0_COMPUTE_GRIDDIM_YX 0x00000238
+#define NVC0_COMPUTE_GRIDDIM_YX_X__MASK 0x0000ffff
+#define NVC0_COMPUTE_GRIDDIM_YX_X__SHIFT 0
+#define NVC0_COMPUTE_GRIDDIM_YX_Y__MASK 0xffff0000
+#define NVC0_COMPUTE_GRIDDIM_YX_Y__SHIFT 16
+
+#define NVC0_COMPUTE_GRIDDIM_Z 0x0000023c
+
+#define NVC0_COMPUTE_UNK244_TIC_FLUSH 0x00000244
+
+#define NVC0_COMPUTE_SHARED_SIZE 0x0000024c
+
+#define NVC0_COMPUTE_THREADS_ALLOC 0x00000250
+
+#define NVC0_COMPUTE_BARRIER_ALLOC 0x00000254
+
+#define NVC0_COMPUTE_UNK028C 0x0000028c
+
+#define NVC0_COMPUTE_COMPUTE_BEGIN 0x0000029c
+#define NVC0_COMPUTE_COMPUTE_BEGIN_UNK0 0x00000001
+
+#define NVC0_COMPUTE_UNK02A0 0x000002a0
+
+#define NVC0_COMPUTE_CP_GPR_ALLOC 0x000002c0
+
+#define NVC0_COMPUTE_UNK02C4 0x000002c4
+
+#define NVC0_COMPUTE_GLOBAL_BASE 0x000002c8
+#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__MASK 0x000000ff
+#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__SHIFT 0
+#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__MASK 0x00ff0000
+#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__SHIFT 16
+#define NVC0_COMPUTE_GLOBAL_BASE_READ_OK 0x40000000
+#define NVC0_COMPUTE_GLOBAL_BASE_WRITE_OK 0x80000000
+
+#define NVC8_COMPUTE_UNK02E0 0x000002e0
+
+#define NVC0_COMPUTE_CACHE_SPLIT 0x00000308
+#define NVC0_COMPUTE_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001
+#define NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003
+
+#define NVC0_COMPUTE_UNK030C 0x0000030c
+
+#define NVC0_COMPUTE_UNK0360 0x00000360
+#define NVC0_COMPUTE_UNK0360_UNK0 0x00000001
+#define NVC0_COMPUTE_UNK0360_UNK8__MASK 0x00000300
+#define NVC0_COMPUTE_UNK0360_UNK8__SHIFT 8
+#define NVC8_COMPUTE_UNK0360_UNK10__MASK 0x00000c00
+#define NVC8_COMPUTE_UNK0360_UNK10__SHIFT 10
+
+#define NVC0_COMPUTE_LAUNCH 0x00000368
+
+#define NVC0_COMPUTE_UNK036C 0x0000036c
+#define NVC0_COMPUTE_UNK036C_UNK0__MASK 0x00000003
+#define NVC0_COMPUTE_UNK036C_UNK0__SHIFT 0
+#define NVC8_COMPUTE_UNK036C_UNK2__MASK 0x0000000c
+#define NVC8_COMPUTE_UNK036C_UNK2__SHIFT 2
+
+#define NVC0_COMPUTE_BLOCKDIM_YX 0x000003ac
+#define NVC0_COMPUTE_BLOCKDIM_YX_X__MASK 0x0000ffff
+#define NVC0_COMPUTE_BLOCKDIM_YX_X__SHIFT 0
+#define NVC0_COMPUTE_BLOCKDIM_YX_Y__MASK 0xffff0000
+#define NVC0_COMPUTE_BLOCKDIM_YX_Y__SHIFT 16
+
+#define NVC0_COMPUTE_BLOCKDIM_Z 0x000003b0
+
+#define NVC0_COMPUTE_CP_START_ID 0x000003b4
+
+#define NVC0_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0))
+#define NVC0_COMPUTE_FIRMWARE__ESIZE 0x00000004
+#define NVC0_COMPUTE_FIRMWARE__LEN 0x00000020
+
+#define NVC0_COMPUTE_MP_LIMIT 0x00000758
+
+#define NVC0_COMPUTE_LOCAL_BASE 0x0000077c
+
+#define NVC0_COMPUTE_GRIDID 0x00000780
+
+#define NVC0_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790
+
+#define NVC0_COMPUTE_TEMP_ADDRESS_LOW 0x00000794
+
+#define NVC0_COMPUTE_TEMP_SIZE_HIGH 0x00000798
+
+#define NVC0_COMPUTE_TEMP_SIZE_LOW 0x0000079c
+
+#define NVC0_COMPUTE_WARP_TEMP_ALLOC 0x000007a0
+
+#define NVC0_COMPUTE_COMPUTE_END 0x00000a04
+#define NVC0_COMPUTE_COMPUTE_END_UNK0 0x00000001
+
+#define NVC0_COMPUTE_UNK0A08 0x00000a08
+
+#define NVC0_COMPUTE_CALL_LIMIT_LOG 0x00000d64
+
+#define NVC0_COMPUTE_UNK0D94 0x00000d94
+
+#define NVC0_COMPUTE_WATCHDOG_TIMER 0x00000de4
+
+#define NVC0_COMPUTE_UNK10F4 0x000010f4
+#define NVC0_COMPUTE_UNK10F4_UNK0 0x00000001
+#define NVC0_COMPUTE_UNK10F4_UNK4 0x00000010
+#define NVC0_COMPUTE_UNK10F4_UNK8 0x00000100
+
+#define NVC0_COMPUTE_LINKED_TSC 0x00001234
+
+#define NVC0_COMPUTE_UNK1288_TIC_FLUSH 0x00001288
+
+#define NVC0_COMPUTE_UNK12AC 0x000012ac
+
+#define NVC0_COMPUTE_TSC_FLUSH 0x00001330
+#define NVC0_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_TIC_FLUSH 0x00001334
+#define NVC0_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_TEX_CACHE_CTL 0x00001338
+#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007
+#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0
+#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_UNK1354 0x00001354
+
+#define NVC0_COMPUTE_UNK1424_TSC_FLUSH 0x00001424
+
+#define NVC0_COMPUTE_COND_ADDRESS_HIGH 0x00001550
+
+#define NVC0_COMPUTE_COND_ADDRESS_LOW 0x00001554
+
+#define NVC0_COMPUTE_COND_MODE 0x00001558
+#define NVC0_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NVC0_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NVC0_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NVC0_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVC0_COMPUTE_TSC_ADDRESS_LOW 0x00001560
+
+#define NVC0_COMPUTE_TSC_LIMIT 0x00001564
+
+#define NVC0_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVC0_COMPUTE_TIC_ADDRESS_LOW 0x00001578
+
+#define NVC0_COMPUTE_TIC_LIMIT 0x0000157c
+
+#define NVC0_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVC0_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVC0_COMPUTE_TEX_MISC 0x00001664
+#define NVC0_COMPUTE_TEX_MISC_UNK 0x00000001
+#define NVC0_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002
+
+#define NVC0_COMPUTE_UNK1690 0x00001690
+#define NVC0_COMPUTE_UNK1690_ALWAYS_DERIV 0x00000001
+#define NVC0_COMPUTE_UNK1690_UNK16 0x00010000
+
+#define NVC0_COMPUTE_CB_BIND 0x00001694
+#define NVC0_COMPUTE_CB_BIND_VALID 0x00000001
+#define NVC0_COMPUTE_CB_BIND_INDEX__MASK 0x00001f00
+#define NVC0_COMPUTE_CB_BIND_INDEX__SHIFT 8
+
+#define NVC0_COMPUTE_FLUSH 0x00001698
+#define NVC0_COMPUTE_FLUSH_CODE 0x00000001
+#define NVC0_COMPUTE_FLUSH_GLOBAL 0x00000010
+#define NVC0_COMPUTE_FLUSH_UNK8 0x00000100
+#define NVC0_COMPUTE_FLUSH_CB 0x00001000
+
+#define NVC0_COMPUTE_UNK1930 0x00001930
+
+#define NVC0_COMPUTE_UNK1944 0x00001944
+
+#define NVC0_COMPUTE_DELAY 0x00001a24
+
+#define NVC0_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0))
+#define NVC0_COMPUTE_UNK1A2C__ESIZE 0x00000004
+#define NVC0_COMPUTE_UNK1A2C__LEN 0x00000005
+
+#define NVC0_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVC0_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVC0_COMPUTE_QUERY_SEQUENCE 0x00001b08
+
+#define NVC0_COMPUTE_QUERY_GET 0x00001b0c
+#define NVC0_COMPUTE_QUERY_GET_MODE__MASK 0x00000003
+#define NVC0_COMPUTE_QUERY_GET_MODE__SHIFT 0
+#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000
+#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003
+#define NVC0_COMPUTE_QUERY_GET_INTR 0x00100000
+#define NVC0_COMPUTE_QUERY_GET_SHORT 0x10000000
+
+#define NVC0_COMPUTE_CB_SIZE 0x00002380
+
+#define NVC0_COMPUTE_CB_ADDRESS_HIGH 0x00002384
+
+#define NVC0_COMPUTE_CB_ADDRESS_LOW 0x00002388
+
+#define NVC0_COMPUTE_CB_POS 0x0000238c
+
+#define NVC0_COMPUTE_CB_DATA(i0) (0x00002390 + 0x4*(i0))
+#define NVC0_COMPUTE_CB_DATA__ESIZE 0x00000004
+#define NVC0_COMPUTE_CB_DATA__LEN 0x00000010
+
+#define NVC0_COMPUTE_IMAGE(i0) (0x00002700 + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE__ESIZE 0x00000020
+#define NVC0_COMPUTE_IMAGE__LEN 0x00000008
+
+#define NVC0_COMPUTE_IMAGE_ADDRESS_HIGH(i0) (0x00002700 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_ADDRESS_LOW(i0) (0x00002704 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_WIDTH(i0) (0x00002708 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_HEIGHT(i0) (0x0000270c + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__MASK 0x0000ffff
+#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__SHIFT 0
+#define NVC0_COMPUTE_IMAGE_HEIGHT_UNK16 0x00010000
+#define NVC0_COMPUTE_IMAGE_HEIGHT_LINEAR 0x00100000
+
+#define NVC0_COMPUTE_IMAGE_FORMAT(i0) (0x00002710 + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE_FORMAT_UNK0 0x00000001
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__MASK 0x00000ff0
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__SHIFT 4
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__MASK 0x0001f000
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__SHIFT 12
+
+#define NVC0_COMPUTE_IMAGE_TILE_MODE(i0) (0x00002714 + 0x20*(i0))
+
+#define NVC0_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SET__LEN 0x00000008
+
+#define NVC0_COMPUTE_MP_PM_SIGSEL(i0) (0x0000337c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SIGSEL__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SIGSEL__LEN 0x00000008
+
+#define NVC0_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000007
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x00000070
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 4
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000700
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 8
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00007000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 12
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00070000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 16
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00700000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 20
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x07000000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 24
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x70000000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 28
+
+#define NVC0_COMPUTE_MP_PM_OP(i0) (0x000033bc + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_OP__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_OP__LEN 0x00000008
+#define NVC0_COMPUTE_MP_PM_OP_MODE__MASK 0x00000001
+#define NVC0_COMPUTE_MP_PM_OP_MODE__SHIFT 0
+#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP 0x00000000
+#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP_PULSE 0x00000001
+#define NVC0_COMPUTE_MP_PM_OP_FUNC__MASK 0x000ffff0
+#define NVC0_COMPUTE_MP_PM_OP_FUNC__SHIFT 4
+
+#define NVC0_COMPUTE_MP_PM_UNK33DC 0x000033dc
+
+
+#endif /* NVC0_COMPUTE_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 8cd5fc5a646..69e1970b64e 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -258,8 +258,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
pipe->draw_vbo = nvc0_draw_vbo;
pipe->clear = nvc0_clear;
- if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
- pipe->launch_grid = nve4_launch_grid;
+ pipe->launch_grid = (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) ?
+ nve4_launch_grid : nvc0_launch_grid;
pipe->flush = nvc0_flush;
pipe->texture_barrier = nvc0_texture_barrier;
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 0431b89e151..9e589602964 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -358,4 +358,8 @@ void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
void nve4_launch_grid(struct pipe_context *,
const uint *, const uint *, uint32_t, const void *);
+/* nvc0_compute.c */
+void nvc0_launch_grid(struct pipe_context *,
+ const uint *, const uint *, uint32_t, const void *);
+
#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 171a30256a4..bc5580bdadc 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -489,6 +489,11 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
switch (screen->base.device->chipset & 0xf0) {
case 0xc0:
case 0xd0:
+ /* Using COMPUTE has weird effects on 3D state, we need to
+ * investigate this further before enabling it by default.
+ */
+ if (debug_get_bool_option("NVC0_COMPUTE", FALSE))
+ return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
return 0;
case 0xe0:
case 0xf0:
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index ed43696ee8b..54ff6221385 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -215,6 +215,7 @@ int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
uint32_t lneg, uint32_t cstack);
diff --git a/src/gallium/drivers/nvc0/nve4_compute.c b/src/gallium/drivers/nvc0/nve4_compute.c
index abadd601a68..8b39f7342b4 100644
--- a/src/gallium/drivers/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nvc0/nve4_compute.c
@@ -23,6 +23,7 @@
*/
#include "nvc0_context.h"
+#include "nvc0_compute.h"
#include "nve4_compute.h"
#include "nv50/codegen/nv50_ir_driver.h"
@@ -298,38 +299,9 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
static boolean
-nve4_compute_validate_program(struct nvc0_context *nvc0)
-{
- struct nvc0_program *prog = nvc0->compprog;
-
- if (prog->mem)
- return TRUE;
-
- if (!prog->translated) {
- prog->translated = nvc0_program_translate(
- prog, nvc0->screen->base.device->chipset);
- if (!prog->translated)
- return FALSE;
- }
- if (unlikely(!prog->code_size))
- return FALSE;
-
- if (likely(prog->code_size)) {
- if (nvc0_program_upload_code(nvc0, prog)) {
- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
- PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CODE);
- return TRUE;
- }
- }
- return FALSE;
-}
-
-
-static boolean
nve4_compute_state_validate(struct nvc0_context *nvc0)
{
- if (!nve4_compute_validate_program(nvc0))
+ if (!nvc0_compute_validate_program(nvc0))
return FALSE;
if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
nve4_compute_validate_textures(nvc0);