summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nouveau')
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp46
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.c320
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h444
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c45
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h24
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c27
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_push.c42
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c77
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.h6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.c47
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.h16
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c207
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c417
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h45
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c65
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h19
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c99
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c18
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c15
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_winsys.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c17
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c12
30 files changed, 2026 insertions, 55 deletions
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 83f81135590..31a93659647 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -64,6 +64,8 @@ NV50_C_SOURCES := \
nv50/nv50_3ddefs.xml.h \
nv50/nv50_3d.xml.h \
nv50/nv50_blit.h \
+ nv50/nv50_compute.c \
+ nv50/nv50_compute.xml.h \
nv50/nv50_context.c \
nv50/nv50_context.h \
nv50/nv50_defs.xml.h \
@@ -76,6 +78,10 @@ NV50_C_SOURCES := \
nv50/nv50_query.h \
nv50/nv50_query_hw.c \
nv50/nv50_query_hw.h \
+ nv50/nv50_query_hw_metric.c \
+ nv50/nv50_query_hw_metric.h \
+ nv50/nv50_query_hw_sm.c \
+ nv50/nv50_query_hw_sm.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 2a13e1086a0..9f84de03a4a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2357,6 +2357,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_PFETCH:
emitPFETCH(insn);
break;
+ case OP_AFETCH:
+ emitAFETCH(insn);
+ break;
case OP_EMIT:
case OP_RESTART:
emitOUT(insn);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 7859c8e79bd..41d2cc9167c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
Instruction *st;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
- st = new_Instruction(func, OP_STORE, ty);
- st->setSrc(0, slot);
- st->setSrc(1, lval);
lval->noSpill = 1;
+ if (ty != TYPE_B96) {
+ st = new_Instruction(func, OP_STORE, ty);
+ st->setSrc(0, slot);
+ st->setSrc(1, lval);
+ } else {
+ st = new_Instruction(func, OP_SPLIT, ty);
+ st->setSrc(0, lval);
+ for (int d = 0; d < lval->reg.size / 4; ++d)
+ st->setDef(d, new_LValue(func, FILE_GPR));
+
+ for (int d = lval->reg.size / 4 - 1; d >= 0; --d) {
+ Value *tmp = cloneShallow(func, slot);
+ tmp->reg.size = 4;
+ tmp->reg.data.offset += 4 * d;
+
+ Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32);
+ s->setSrc(0, tmp);
+ s->setSrc(1, st->getDef(d));
+ defi->bb->insertAfter(defi, s);
+ }
+ }
} else {
st = new_Instruction(func, OP_CVT, ty);
st->setDef(0, slot);
@@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
Instruction *ld;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
lval->noSpill = 1;
- ld = new_Instruction(func, OP_LOAD, ty);
+ if (ty != TYPE_B96) {
+ ld = new_Instruction(func, OP_LOAD, ty);
+ } else {
+ ld = new_Instruction(func, OP_MERGE, ty);
+ for (int d = 0; d < lval->reg.size / 4; ++d) {
+ Value *tmp = cloneShallow(func, slot);
+ LValue *val;
+ tmp->reg.size = 4;
+ tmp->reg.data.offset += 4 * d;
+
+ Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32);
+ l->setDef(0, (val = new_LValue(func, FILE_GPR)));
+ l->setSrc(0, tmp);
+ usei->bb->insertBefore(usei, l);
+ ld->setSrc(d, val);
+ val->noSpill = 1;
+ }
+ ld->setDef(0, lval);
+ usei->bb->insertBefore(usei, ld);
+ return lval;
+ }
} else {
ld = new_Instruction(func, OP_CVT, ty);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 68e69beb08f..1695553d793 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -657,8 +657,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
buffer->domain = NOUVEAU_BO_GART;
- } else if (buffer->base.bind &
- (screen->vidmem_bindings & screen->sysmem_bindings)) {
+ } else if (buffer->base.bind == 0 || (buffer->base.bind &
+ (screen->vidmem_bindings & screen->sysmem_bindings))) {
switch (buffer->base.usage) {
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
@@ -685,6 +685,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (buffer->base.bind & screen->sysmem_bindings)
buffer->domain = NOUVEAU_BO_GART;
}
+ /* There can be very special situations where we want non-gpu-mapped
+ * buffers, but never through this interface.
+ */
+ assert(buffer->domain);
ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
if (ret == false)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
new file mode 100644
index 00000000000..6d23fd66945
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2012 Francisco Jerez
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_compute.xml.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+int
+nv50_screen_compute_setup(struct nv50_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_device *dev = screen->base.device;
+ struct nouveau_object *chan = screen->base.channel;
+ struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
+ unsigned obj_class;
+ int i, ret;
+
+ switch (dev->chipset & 0xf0) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ obj_class = NV50_COMPUTE_CLASS;
+ break;
+ case 0xa0:
+ switch (dev->chipset) {
+ case 0xa3:
+ case 0xa5:
+ case 0xa8:
+ obj_class = NVA3_COMPUTE_CLASS;
+ break;
+ default:
+ obj_class = NV50_COMPUTE_CLASS;
+ break;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret)
+ return ret;
+
+ BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->handle);
+
+ BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->stack_bo->offset);
+ PUSH_DATA (push, screen->stack_bo->offset);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
+ PUSH_DATA (push, 4);
+
+ BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
+ PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
+ BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
+ PUSH_DATA (push, 0x100);
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
+ PUSH_DATA (push, fifo->vram);
+
+ for (i = 0; i < 15; i++) {
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
+ PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+ }
+
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
+ PUSH_DATA (push, ~0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
+ PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
+ PUSH_DATA (push, 7);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
+ PUSH_DATA (push, 7);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
+ PUSH_DATA (push, 0x54);
+ BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
+ PUSH_DATA (push, fifo->vram);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls_bo->offset + 65536);
+ PUSH_DATA (push, screen->tls_bo->offset + 65536);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
+ PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
+
+ return 0;
+}
+
+static bool
+nv50_compute_validate_program(struct nv50_context *nv50)
+{
+ struct nv50_program *prog = nv50->compprog;
+
+ if (prog->mem)
+ return true;
+
+ if (!prog->translated) {
+ prog->translated = nv50_program_translate(
+ prog, nv50->screen->base.device->chipset, &nv50->base.debug);
+ if (!prog->translated)
+ return false;
+ }
+ if (unlikely(!prog->code_size))
+ return false;
+
+ if (likely(prog->code_size)) {
+ if (nv50_program_upload_code(nv50, prog)) {
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+nv50_compute_validate_globals(struct nv50_context *nv50)
+{
+ unsigned i;
+
+ for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource *res = *util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, i);
+ if (res)
+ nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
+ nv04_resource(res), NOUVEAU_BO_RDWR);
+ }
+}
+
+static bool
+nv50_compute_state_validate(struct nv50_context *nv50)
+{
+ if (!nv50_compute_validate_program(nv50))
+ return false;
+
+ if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
+ nv50_compute_validate_globals(nv50);
+
+ /* TODO: validate textures, samplers, surfaces */
+
+ nv50_bufctx_fence(nv50->bufctx_cp, false);
+
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
+ return false;
+ if (unlikely(nv50->state.flushed))
+ nv50_bufctx_fence(nv50->bufctx_cp, true);
+
+ return true;
+}
+
+static void
+nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+ unsigned size = align(nv50->compprog->parm_size, 0x4);
+
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ PUSH_DATA (push, (size / 4) << 8);
+
+ if (size) {
+ struct nouveau_mm_allocation *mm;
+ struct nouveau_bo *bo = NULL;
+ unsigned offset;
+
+ mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
+ assert(mm);
+
+ nouveau_bo_map(bo, 0, screen->base.client);
+ memcpy(bo->map + offset, input, size);
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
+ nouveau_pushbuf_data(push, bo, offset, size);
+
+ nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
+ nouveau_bo_ref(NULL, &bo);
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+ }
+}
+
+static uint32_t
+nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
+{
+ struct nv50_program *prog = nv50->compprog;
+ const struct nv50_ir_prog_symbol *syms =
+ (const struct nv50_ir_prog_symbol *)prog->cp.syms;
+ unsigned i;
+
+ for (i = 0; i < prog->cp.num_syms; ++i) {
+ if (syms[i].label == label)
+ return prog->code_base + syms[i].offset;
+ }
+ return prog->code_base; /* no symbols or symbol not found */
+}
+
+void
+nv50_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label, const void *input)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
+ struct nv50_program *cp = nv50->compprog;
+ bool ret;
+
+ ret = !nv50_compute_state_validate(nv50);
+ if (ret) {
+ NOUVEAU_ERR("Failed to launch grid !\n");
+ return;
+ }
+
+ nv50_compute_upload_input(nv50, input);
+
+ BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
+ PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));
+
+ BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
+ PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
+ BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, cp->max_gpr);
+
+ /* grid/block setup */
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
+ PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
+ PUSH_DATA (push, block_layout[2]);
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
+ PUSH_DATA (push, 1 << 16 | block_size);
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
+ PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
+ BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
+ PUSH_DATA (push, 1);
+
+ /* kernel launching */
+ BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+ /* bind a compute shader clobbers fragment shader state */
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
new file mode 100644
index 00000000000..268d11253b6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
@@ -0,0 +1,444 @@
+#ifndef NV50_COMPUTE_XML
+#define NV50_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/graph/g80_compute.xml ( 14027 bytes, from 2015-02-14 02:01:36)
+- rnndb/copyright.xml ( 6456 bytes, from 2015-02-14 02:01:36)
+- rnndb/nvchipsets.xml ( 2833 bytes, from 2015-04-28 16:28:33)
+- rnndb/fifo/nv_object.xml ( 15390 bytes, from 2015-04-22 20:36:09)
+- rnndb/g80_defs.xml ( 18210 bytes, from 2015-10-19 20:49:59)
+
+Copyright (C) 2006-2015 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin Koƛcielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NV50_COMPUTE_DMA_NOTIFY 0x00000180
+
+#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0
+
+#define NV50_COMPUTE_DMA_QUERY 0x000001a4
+
+#define NV50_COMPUTE_DMA_LOCAL 0x000001b8
+
+#define NV50_COMPUTE_DMA_STACK 0x000001bc
+
+#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0
+
+#define NV50_COMPUTE_DMA_TSC 0x000001c4
+
+#define NV50_COMPUTE_DMA_TIC 0x000001c8
+
+#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc
+
+#define NV50_COMPUTE_UNK0200 0x00000200
+#define NV50_COMPUTE_UNK0200_UNK1__MASK 0x0000ffff
+#define NV50_COMPUTE_UNK0200_UNK1__SHIFT 0
+#define NV50_COMPUTE_UNK0200_UNK2__MASK 0x00ff0000
+#define NV50_COMPUTE_UNK0200_UNK2__SHIFT 16
+
+#define NV50_COMPUTE_UNK0204 0x00000204
+
+#define NV50_COMPUTE_UNK0208 0x00000208
+
+#define NV50_COMPUTE_UNK020C 0x0000020c
+
+#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210
+
+#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214
+
+#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218
+
+#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c
+
+#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220
+
+#define NV50_COMPUTE_CALL_LIMIT_LOG 0x00000224
+
+#define NV50_COMPUTE_UNK0228 0x00000228
+#define NV50_COMPUTE_UNK0228_UNK0 0x00000001
+#define NV50_COMPUTE_UNK0228_UNK4__MASK 0x00000ff0
+#define NV50_COMPUTE_UNK0228_UNK4__SHIFT 4
+#define NV50_COMPUTE_UNK0228_UNK12__MASK 0x000ff000
+#define NV50_COMPUTE_UNK0228_UNK12__SHIFT 12
+
+#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c
+
+#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230
+#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NV50_COMPUTE_TSC_LIMIT 0x00000234
+#define NV50_COMPUTE_TSC_LIMIT__MAX 0x00001fff
+
+#define NV50_COMPUTE_CB_ADDR 0x00000238
+#define NV50_COMPUTE_CB_ADDR_ID__MASK 0x003fff00
+#define NV50_COMPUTE_CB_ADDR_ID__SHIFT 8
+#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK 0x0000007f
+#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT 0
+
+#define NV50_COMPUTE_CB_DATA(i0) (0x0000023c + 0x4*(i0))
+#define NV50_COMPUTE_CB_DATA__ESIZE 0x00000004
+#define NV50_COMPUTE_CB_DATA__LEN 0x00000010
+
+#define NV50_COMPUTE_TSC_FLUSH 0x0000027c
+#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_COMPUTE_TIC_FLUSH 0x00000280
+#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_COMPUTE_DELAY1 0x00000284
+
+#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288
+
+#define NV50_COMPUTE_DELAY2 0x0000028c
+
+#define NV50_COMPUTE_UNK0290 0x00000290
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN 0x00000100
+
+#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c
+
+#define NV50_COMPUTE_UNK02A0 0x000002a0
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8
+
+#define NV50_COMPUTE_CB_DEF_SET 0x000002ac
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK 0x0000ffff
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT 0
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK 0x007f0000
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT 16
+
+#define NV50_COMPUTE_UNK02B0 0x000002b0
+
+#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK 0x0000ffff
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT 0
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK 0x00ff0000
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT 16
+
+#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8
+
+#define NV50_COMPUTE_UNK02BC 0x000002bc
+#define NV50_COMPUTE_UNK02BC_UNK1__MASK 0x00000007
+#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT 0
+#define NV50_COMPUTE_UNK02BC_UNK2__MASK 0x00000070
+#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT 4
+
+#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0
+
+#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4
+
+#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8
+
+#define NV50_COMPUTE_TIC_LIMIT 0x000002cc
+
+#define NV50_COMPUTE_MP_PM_SET(i0) (0x000002d0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NV50_COMPUTE_MP_PM_SET__LEN 0x00000004
+
+#define NV50_COMPUTE_MP_PM_CONTROL(i0) (0x000002e0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE 0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL__LEN 0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK 0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT 0
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP 0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE 0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK 0x00000070
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT 4
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0 0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1 0x00000010
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2 0x00000020
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3 0x00000030
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4 0x00000040
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5 0x00000050
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK 0x00ffff00
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT 8
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK 0xff000000
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT 24
+
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE 0x000002f0
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0 0x00000001
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1 0x00000002
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2 0x00000004
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3 0x00000008
+
+#define NV50_COMPUTE_UNK02F4 0x000002f4
+
+#define NV50_COMPUTE_BLOCKDIM_LATCH 0x000002f8
+
+#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc
+
+#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300
+
+#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304
+
+#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308
+
+#define NV50_COMPUTE_UNK030C 0x0000030c
+
+#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310
+
+#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314
+
+#define NV50_COMPUTE_QUERY_SEQUENCE 0x00000318
+
+#define NV50_COMPUTE_QUERY_GET 0x0000031c
+#define NV50_COMPUTE_QUERY_GET_INTR 0x00000200
+#define NV50_COMPUTE_QUERY_GET_SHORT 0x00008000
+
+#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320
+
+#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324
+
+#define NV50_COMPUTE_COND_MODE 0x00000328
+#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NV50_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_COMPUTE_UNK032C 0x0000032c
+
+#define NV50_COMPUTE_UNK0330 0x00000330
+
+#define NV50_COMPUTE_UNK0334(i0) (0x00000334 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0334__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0334__LEN 0x00000003
+
+#define NV50_COMPUTE_UNK0340(i0) (0x00000340 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0340__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0340__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0348(i0) (0x00000348 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0348__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0348__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0350(i0) (0x00000350 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0350__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0350__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0358 0x00000358
+
+#define NV50_COMPUTE_UNK035C 0x0000035c
+
+#define NV50_COMPUTE_UNK0360 0x00000360
+#define NV50_COMPUTE_UNK0360_UNK0__MASK 0x000000f0
+#define NV50_COMPUTE_UNK0360_UNK0__SHIFT 4
+#define NV50_COMPUTE_UNK0360_UNK1__MASK 0x00000f00
+#define NV50_COMPUTE_UNK0360_UNK1__SHIFT 8
+
+#define NV50_COMPUTE_UNK0364 0x00000364
+
+#define NV50_COMPUTE_LAUNCH 0x00000368
+
+#define NV50_COMPUTE_UNK036C 0x0000036c
+
+#define NV50_COMPUTE_UNK0370 0x00000370
+
+#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK 0x000000ff
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT 0
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK 0x0000ff00
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT 8
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX 0x00000040
+
+#define NV50_COMPUTE_LINKED_TSC 0x00000378
+
+#define NV50_COMPUTE_UNK037C 0x0000037c
+#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV 0x00000001
+#define NV50_COMPUTE_UNK037C_UNK16 0x00010000
+
+#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380
+
+#define NV50_COMPUTE_UNK0384 0x00000384
+
+#define NV50_COMPUTE_GRIDID 0x00000388
+
+#define NV50_COMPUTE_UNK038C(i0) (0x0000038c + 0x4*(i0))
+#define NV50_COMPUTE_UNK038C__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK038C__LEN 0x00000003
+
+#define NV50_COMPUTE_WRCACHE_FLUSH 0x00000398
+
+#define NV50_COMPUTE_UNK039C(i0) (0x0000039c + 0x4*(i0))
+#define NV50_COMPUTE_UNK039C__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK039C__LEN 0x00000002
+
+#define NV50_COMPUTE_GRIDDIM 0x000003a4
+#define NV50_COMPUTE_GRIDDIM_X__MASK 0x0000ffff
+#define NV50_COMPUTE_GRIDDIM_X__SHIFT 0
+#define NV50_COMPUTE_GRIDDIM_Y__MASK 0xffff0000
+#define NV50_COMPUTE_GRIDDIM_Y__SHIFT 16
+
+#define NV50_COMPUTE_SHARED_SIZE 0x000003a8
+#define NV50_COMPUTE_SHARED_SIZE__MAX 0x00004000
+#define NV50_COMPUTE_SHARED_SIZE__ALIGN 0x00000040
+
+#define NV50_COMPUTE_BLOCKDIM_XY 0x000003ac
+#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK 0x0000ffff
+#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT 0
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK 0xffff0000
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT 16
+
+#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0
+#define NV50_COMPUTE_BLOCKDIM_Z__MIN 0x00000001
+#define NV50_COMPUTE_BLOCKDIM_Z__MAX 0x00000040
+
+#define NV50_COMPUTE_CP_START_ID 0x000003b4
+
+#define NV50_COMPUTE_REG_MODE 0x000003b8
+#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001
+#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002
+
+#define NV50_COMPUTE_TEX_LIMITS 0x000003bc
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NV50_COMPUTE_BIND_TSC 0x000003c0
+#define NV50_COMPUTE_BIND_TSC_VALID 0x00000001
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK 0x000000f0
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4
+#define NV50_COMPUTE_BIND_TSC_TSC__MASK 0x001ff000
+#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT 12
+
+#define NV50_COMPUTE_BIND_TIC 0x000003c4
+#define NV50_COMPUTE_BIND_TIC_VALID 0x00000001
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1
+#define NV50_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT 9
+
+#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK 0x00000f00
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT 8
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT 12
+#define NV50_COMPUTE_SET_PROGRAM_CB_VALID 0x000000ff
+
+#define NV50_COMPUTE_UNK03CC 0x000003cc
+
+#define NV50_COMPUTE_TEX_CACHE_CTL 0x000003d0
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NV50_COMPUTE_UNK03D4 0x000003d4
+
+#define NV50_COMPUTE_UNK03D8 0x000003d8
+
+#define NV50_COMPUTE_UNK03DC 0x000003dc
+
+#define NV50_COMPUTE_UNK03E0 0x000003e0
+
+#define NV50_COMPUTE_UNK03E4 0x000003e4
+
+#define NVA3_COMPUTE_TEX_MISC 0x000003e8
+#define NVA3_COMPUTE_TEX_MISC_UNK1 0x00000001
+#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002
+
+#define NV50_COMPUTE_GLOBAL(i0) (0x00000400 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL__ESIZE 0x00000020
+#define NV50_COMPUTE_GLOBAL__LEN 0x00000010
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0) (0x00000400 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0) (0x00000404 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_PITCH(i0) (0x00000408 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_PITCH__MAX 0x00800000
+#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN 0x00000100
+
+#define NV50_COMPUTE_GLOBAL_LIMIT(i0) (0x0000040c + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_MODE(i0) (0x00000410 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_MODE_LINEAR 0x00000001
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK 0x000000f0
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT 4
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK 0x00000f00
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT 8
+
+#define NV50_COMPUTE_USER_PARAM(i0) (0x00000600 + 0x4*(i0))
+#define NV50_COMPUTE_USER_PARAM__ESIZE 0x00000004
+#define NV50_COMPUTE_USER_PARAM__LEN 0x00000040
+
+#define NV50_COMPUTE_UNK0700(i0) (0x00000700 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0700__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0700__LEN 0x00000010
+
+
+#endif /* NV50_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 7867c2df7f3..4874b77b1e1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
nouveau_bufctx_del(&nv50->bufctx_3d);
nouveau_bufctx_del(&nv50->bufctx);
+ nouveau_bufctx_del(&nv50->bufctx_cp);
util_unreference_framebuffer_state(&nv50->framebuffer);
@@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
if (!nv50->constbuf[s][i].user)
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
}
+
+ for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource **res = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, i);
+ pipe_resource_reference(res, NULL);
+ }
+ util_dynarray_fini(&nv50->global_residents);
}
static void
@@ -159,9 +168,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nv50_context *nv50 = nv50_context(&ctx->pipe);
+ unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
if (nv50->framebuffer.cbufs[i] &&
@@ -173,7 +183,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nv50->framebuffer.zsbuf &&
nv50->framebuffer.zsbuf->texture == res) {
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
@@ -183,11 +193,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_SAMPLER_VIEW)) {
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
for (i = 0; i < nv50->num_vtxbufs; ++i) {
@@ -263,10 +273,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
nv50->base.pushbuf = screen->base.pushbuf;
nv50->base.client = screen->base.client;
- ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT,
- &nv50->bufctx_3d);
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT,
+ &nv50->bufctx_3d);
if (!ret)
- ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT,
+ &nv50->bufctx_cp);
if (ret)
goto out_err;
@@ -290,6 +303,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->draw_vbo = nv50_draw_vbo;
pipe->clear = nv50_clear;
+ pipe->launch_grid = nv50_launch_grid;
pipe->flush = nv50_flush;
pipe->texture_barrier = nv50_texture_barrier;
@@ -335,19 +349,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+ if (screen->compute) {
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo);
+ }
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
+ if (screen->compute)
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
nv50->base.scratch.bo_size = 2 << 20;
+ util_dynarray_init(&nv50->global_residents);
+
return pipe;
out_err:
if (nv50->bufctx_3d)
nouveau_bufctx_del(&nv50->bufctx_3d);
+ if (nv50->bufctx_cp)
+ nouveau_bufctx_del(&nv50->bufctx_cp);
if (nv50->bufctx)
nouveau_bufctx_del(&nv50->bufctx);
FREE(nv50->blit);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index fb74a9748a3..2cebcd99423 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -49,6 +49,10 @@
#define NV50_NEW_MIN_SAMPLES (1 << 22)
#define NV50_NEW_CONTEXT (1 << 31)
+#define NV50_NEW_CP_PROGRAM (1 << 0)
+#define NV50_NEW_CP_GLOBALS (1 << 1)
+
+/* 3d bufctx (during draw_vbo, blit_3d) */
#define NV50_BIND_FB 0
#define NV50_BIND_VERTEX 1
#define NV50_BIND_VERTEX_TMP 2
@@ -58,7 +62,15 @@
#define NV50_BIND_SO 53
#define NV50_BIND_SCREEN 54
#define NV50_BIND_TLS 55
-#define NV50_BIND_COUNT 56
+#define NV50_BIND_3D_COUNT 56
+
+/* compute bufctx (during launch_grid) */
+#define NV50_BIND_CP_GLOBAL 0
+#define NV50_BIND_CP_SCREEN 1
+#define NV50_BIND_CP_QUERY 2
+#define NV50_BIND_CP_COUNT 3
+
+/* bufctx for other operations */
#define NV50_BIND_2D 0
#define NV50_BIND_M2MF 0
#define NV50_BIND_FENCE 1
@@ -101,8 +113,10 @@ struct nv50_context {
struct nouveau_bufctx *bufctx_3d;
struct nouveau_bufctx *bufctx;
+ struct nouveau_bufctx *bufctx_cp;
uint32_t dirty;
+ uint32_t dirty_cp; /* dirty flags for compute state */
bool cb_dirty;
struct nv50_graph_state state;
@@ -115,6 +129,7 @@ struct nv50_context {
struct nv50_program *vertprog;
struct nv50_program *gmtyprog;
struct nv50_program *fragprog;
+ struct nv50_program *compprog;
struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[3];
@@ -163,6 +178,8 @@ struct nv50_context {
uint32_t cond_condmode; /* the calculated condition */
struct nv50_blitctx *blit;
+
+ struct util_dynarray global_residents;
};
static inline struct nv50_context *
@@ -302,4 +319,9 @@ struct pipe_video_buffer *
nv98_video_buffer_create(struct pipe_context *pipe,
const struct pipe_video_buffer *template);
+/* nv50_compute.c */
+void
+nv50_launch_grid(struct pipe_context *, const uint *, const uint *,
+ uint32_t, const void *);
+
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 89e7a338283..a4b8ddfda95 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,7 +66,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
case TGSI_SEMANTIC_VERTEXID:
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
- prog->vp.vertexid = 1;
continue;
default:
break;
@@ -259,6 +258,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
return nv50_vertprog_assign_slots(info);
case PIPE_SHADER_FRAGMENT:
return nv50_fragprog_assign_slots(info);
+ case PIPE_SHADER_COMPUTE:
+ return 0;
default:
return -1;
}
@@ -355,6 +356,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->gp.has_layer = 0;
prog->gp.has_viewport = 0;
+ if (prog->type == PIPE_SHADER_COMPUTE)
+ info->prop.cp.inputOffset = 0x10;
+
info->driverPriv = prog;
#ifdef DEBUG
@@ -378,6 +382,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
prog->tls_space = info->bin.tlsSpace;
+ prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+
if (prog->type == PIPE_SHADER_FRAGMENT) {
if (info->prop.fp.writesDepth) {
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
@@ -401,6 +407,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
break;
}
prog->gp.vert_count = info->prop.gp.maxVertices;
+ } else
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
}
if (prog->pipe.stream_output.num_outputs)
@@ -423,11 +433,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
struct nouveau_heap *heap;
int ret;
uint32_t size = align(prog->code_size, 0x40);
+ uint8_t prog_type;
switch (prog->type) {
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
+ case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break;
default:
assert(!"invalid program type");
return false;
@@ -450,7 +462,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
return false;
}
}
- prog->code_base = prog->mem->start;
+
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ /* CP code must be uploaded in FP code segment. */
+ prog_type = 1;
+ } else {
+ prog->code_base = prog->mem->start;
+ prog_type = prog->type;
+ }
ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
if (ret < 0) {
@@ -468,7 +487,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
false /* flatshade */);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
- (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+ (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
@@ -489,7 +508,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
FREE(p->code);
FREE(p->fixups);
-
+ FREE(p->interps);
FREE(p->so);
memset(p, 0, sizeof(*p));
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 7a33eb11d6d..1de5122a56e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,9 +76,9 @@ struct nv50_program {
ubyte psiz; /* output slot of point size */
ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */
ubyte edgeflag;
- ubyte vertexid;
ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
ubyte clpd_nr;
+ bool need_vertex_id;
} vp;
struct {
@@ -98,6 +98,13 @@ struct nv50_program {
ubyte viewportid; /* hw value of viewport index output */
} gp;
+ struct {
+ uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
+ uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
+ void *syms;
+ unsigned num_syms;
+ } cp;
+
void *fixups; /* relocation records */
void *interps; /* interpolation records */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
index f31eaa0e314..cbef95d07f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_push.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
@@ -24,6 +24,10 @@ struct push_context {
struct translate *translate;
bool primitive_restart;
+
+ bool need_vertex_id;
+ int32_t index_bias;
+
uint32_t prim;
uint32_t restart_index;
uint32_t instance_id;
@@ -74,6 +78,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -107,6 +116,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -140,6 +154,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -161,10 +180,18 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
static void
emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
{
+ uint32_t elts = 0;
+
while (count) {
unsigned push = MIN2(count, ctx->packet_vertex_limit);
unsigned size = ctx->vertex_words * push;
+ if (unlikely(ctx->need_vertex_id)) {
+ /* For non-indexed draws, gl_VertexID goes up after each vertex. */
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, elts++);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
@@ -216,7 +243,14 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
ctx.push = nv50->base.pushbuf;
ctx.translate = nv50->vertex->translate;
- ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit;
+
+ ctx.need_vertex_id = nv50->screen->base.class_3d >= NV84_3D_CLASS &&
+ nv50->vertprog->vp.need_vertex_id && (nv50->vertex->num_elements < 32);
+ ctx.index_bias = info->index_bias;
+
+ /* For indexed draws, gl_VertexID must be emitted for every vertex. */
+ ctx.packet_vertex_limit =
+ ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit;
ctx.vertex_words = nv50->vertex->vertex_size;
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
@@ -307,4 +341,10 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
ctx.instance_id++;
ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
+
+ if (unlikely(ctx.need_vertex_id)) {
+ /* Reset gl_VertexID to prevent future indexed draws to be confused. */
+ BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx.push, nv50->state.index_bias);
+ }
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index dd9b85b7208..4cd3b615606 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_query.h"
#include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
static struct pipe_query *
nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
@@ -152,4 +154,79 @@ nv50_init_query_functions(struct nv50_context *nv50)
pipe->end_query = nv50_end_query;
pipe->get_query_result = nv50_get_query_result;
pipe->render_condition = nv50_render_condition;
+ nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
+}
+
+int
+nv50_screen_get_driver_query_info(struct pipe_screen *pscreen,
+ unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ int num_hw_queries = 0;
+
+ num_hw_queries = nv50_hw_get_driver_query_info(screen, 0, NULL);
+
+ if (!info)
+ return num_hw_queries;
+
+ /* Init default values. */
+ info->name = "this_is_not_the_query_you_are_looking_for";
+ info->query_type = 0xdeadd01d;
+ info->max_value.u64 = 0;
+ info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+ info->group_id = -1;
+ info->flags = 0;
+
+ return nv50_hw_get_driver_query_info(screen, id, info);
+}
+
+int
+nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
+ unsigned id,
+ struct pipe_driver_query_group_info *info)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += 2;
+
+ if (!info)
+ return count;
+
+ if (id == NV50_HW_SM_QUERY_GROUP) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = "MP counters";
+
+ /* Because we can't expose the number of hardware counters needed
+ * for each different query, we don't want to allow more than one
+ * active query simultaneously to avoid failure when the maximum
+ * number of counters is reached. Note that these groups of GPU
+ * counters are currently only used by AMD_performance_monitor.
+ */
+ info->max_active_queries = 1;
+ info->num_queries = NV50_HW_SM_QUERY_COUNT;
+ return 1;
+ }
+ }
+ } else
+ if (id == NV50_HW_METRIC_QUERY_GROUP) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = "Performance metrics";
+ info->max_active_queries = 1;
+ info->num_queries = NV50_HW_METRIC_QUERY_COUNT;
+ return 1;
+ }
+ }
+ }
+
+ /* user asked for info about non-existing query group */
+ info->name = "this_is_not_the_query_group_you_are_looking_for";
+ info->max_active_queries = 0;
+ info->num_queries = 0;
+ return 0;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h
index d990285c857..bd4c0a386f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h
@@ -28,6 +28,12 @@ nv50_query(struct pipe_query *pipe)
return (struct nv50_query *)pipe;
}
+/*
+ * Driver queries groups:
+ */
+#define NV50_HW_SM_QUERY_GROUP 0
+#define NV50_HW_METRIC_QUERY_GROUP 1
+
void nv50_init_query_functions(struct nv50_context *);
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 945ce7abe50..b6ebbbf1010 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -25,6 +25,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
#include "nv_object.xml.h"
#define NV50_HW_QUERY_STATE_READY 0
@@ -41,7 +43,7 @@
#define NV50_HW_QUERY_ALLOC_SPACE 256
-static bool
+bool
nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
int size)
{
@@ -122,6 +124,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_query *hq = nv50_hw_query(q);
+ if (hq->funcs && hq->funcs->begin_query)
+ return hq->funcs->begin_query(nv50, hq);
+
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render condition to false even *after* we re-
* initialized it to true.
@@ -193,6 +198,11 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_query *hq = nv50_hw_query(q);
+ if (hq->funcs && hq->funcs->end_query) {
+ hq->funcs->end_query(nv50, hq);
+ return;
+ }
+
hq->state = NV50_HW_QUERY_STATE_ENDED;
switch (q->type) {
@@ -261,6 +271,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
uint64_t *data64 = (uint64_t *)hq->data;
int i;
+ if (hq->funcs && hq->funcs->get_query_result)
+ return hq->funcs->get_query_result(nv50, hq, wait, result);
+
if (hq->state != NV50_HW_QUERY_STATE_READY)
nv50_hw_query_update(q);
@@ -331,6 +344,18 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
struct nv50_hw_query *hq;
struct nv50_query *q;
+ hq = nv50_hw_sm_create_query(nv50, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nv50_query *)hq;
+ }
+
+ hq = nv50_hw_metric_create_query(nv50, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nv50_query *)hq;
+ }
+
hq = CALLOC_STRUCT(nv50_hw_query);
if (!hq)
return NULL;
@@ -375,6 +400,26 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
return q;
}
+int
+nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
+
+ num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);
+ num_hw_metric_queries =
+ nv50_hw_metric_get_driver_query_info(screen, 0, NULL);
+
+ if (!info)
+ return num_hw_sm_queries + num_hw_metric_queries;
+
+ if (id < num_hw_sm_queries)
+ return nv50_hw_sm_get_driver_query_info(screen, id, info);
+
+ return nv50_hw_metric_get_driver_query_info(screen,
+ id - num_hw_sm_queries, info);
+}
+
void
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
struct nv50_query *q, unsigned result_offset)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
index 294c67de9a4..82ec6bd2d96 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -8,8 +8,19 @@
#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+struct nv50_hw_query;
+
+struct nv50_hw_query_funcs {
+ void (*destroy_query)(struct nv50_context *, struct nv50_hw_query *);
+ boolean (*begin_query)(struct nv50_context *, struct nv50_hw_query *);
+ void (*end_query)(struct nv50_context *, struct nv50_hw_query *);
+ boolean (*get_query_result)(struct nv50_context *, struct nv50_hw_query *,
+ boolean, union pipe_query_result *);
+};
+
struct nv50_hw_query {
struct nv50_query base;
+ const struct nv50_hw_query_funcs *funcs;
uint32_t *data;
uint32_t sequence;
struct nouveau_bo *bo;
@@ -31,6 +42,11 @@ nv50_hw_query(struct nv50_query *q)
struct nv50_query *
nv50_hw_create_query(struct nv50_context *, unsigned, unsigned);
+int
+nv50_hw_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+bool
+nv50_hw_query_allocate(struct nv50_context *, struct nv50_query *, int);
void
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
struct nv50_query *, unsigned);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
new file mode 100644
index 00000000000..d1bccb94193
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NV84+ === */
+static const char *nv50_hw_metric_names[] =
+{
+ "metric-branch_efficiency",
+};
+
+struct nv50_hw_metric_query_cfg {
+ uint32_t queries[4];
+ uint32_t num_queries;
+};
+
+#define _SM(n) NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_ ##n)
+#define _M(n, c) [NV50_HW_METRIC_QUERY_##n] = c
+
+/* ==== Compute capability 1.1 (G84+) ==== */
+static const struct nv50_hw_metric_query_cfg
+sm11_branch_efficiency =
+{
+ .queries[0] = _SM(BRANCH),
+ .queries[1] = _SM(DIVERGENT_BRANCH),
+ .num_queries = 2,
+};
+
+static const struct nv50_hw_metric_query_cfg *sm11_hw_metric_queries[] =
+{
+ _M(BRANCH_EFFICIENCY, &sm11_branch_efficiency),
+};
+
+#undef _SM
+#undef _M
+
+static const struct nv50_hw_metric_query_cfg *
+nv50_hw_metric_query_get_cfg(struct nv50_context *nv50,
+ struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ return sm11_hw_metric_queries[q->type - NV50_HW_METRIC_QUERY(0)];
+}
+
+static void
+nv50_hw_metric_destroy_query(struct nv50_context *nv50,
+ struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]);
+ FREE(hmq);
+}
+
+static boolean
+nv50_hw_metric_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->begin_query(nv50, hmq->queries[i]);
+ if (!ret)
+ return ret;
+ }
+ return ret;
+}
+
+static void
+nv50_hw_metric_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->end_query(nv50, hmq->queries[i]);
+}
+
+static uint64_t
+sm11_hw_metric_calc_result(struct nv50_hw_query *hq, uint64_t res64[8])
+{
+ switch (hq->base.type - NV50_HW_METRIC_QUERY(0)) {
+ case NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
+ /* (branch / (branch + divergent_branch)) * 100 */
+ if (res64[0] + res64[1])
+ return (res64[0] / (double)(res64[0] + res64[1])) * 100;
+ break;
+ default:
+ debug_printf("invalid metric type: %d\n",
+ hq->base.type - NV50_HW_METRIC_QUERY(0));
+ break;
+ }
+ return 0;
+}
+
+static boolean
+nv50_hw_metric_get_query_result(struct nv50_context *nv50,
+ struct nv50_hw_query *hq, boolean wait,
+ union pipe_query_result *result)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ union pipe_query_result results[4] = {};
+ uint64_t res64[4] = {};
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->get_query_result(nv50, hmq->queries[i],
+ wait, &results[i]);
+ if (!ret)
+ return ret;
+ res64[i] = *(uint64_t *)&results[i];
+ }
+
+ *(uint64_t *)result = sm11_hw_metric_calc_result(hq, res64);
+ return ret;
+}
+
+static const struct nv50_hw_query_funcs hw_metric_query_funcs = {
+ .destroy_query = nv50_hw_metric_destroy_query,
+ .begin_query = nv50_hw_metric_begin_query,
+ .end_query = nv50_hw_metric_end_query,
+ .get_query_result = nv50_hw_metric_get_query_result,
+};
+
+struct nv50_hw_query *
+nv50_hw_metric_create_query(struct nv50_context *nv50, unsigned type)
+{
+ const struct nv50_hw_metric_query_cfg *cfg;
+ struct nv50_hw_metric_query *hmq;
+ struct nv50_hw_query *hq;
+ unsigned i;
+
+ if (type < NV50_HW_METRIC_QUERY(0) || type > NV50_HW_METRIC_QUERY_LAST)
+ return NULL;
+
+ hmq = CALLOC_STRUCT(nv50_hw_metric_query);
+ if (!hmq)
+ return NULL;
+
+ hq = &hmq->base;
+ hq->funcs = &hw_metric_query_funcs;
+ hq->base.type = type;
+
+ cfg = nv50_hw_metric_query_get_cfg(nv50, hq);
+
+ for (i = 0; i < cfg->num_queries; i++) {
+ hmq->queries[i] = nv50_hw_sm_create_query(nv50, cfg->queries[i]);
+ if (!hmq->queries[i]) {
+ nv50_hw_metric_destroy_query(nv50, hq);
+ return NULL;
+ }
+ hmq->num_queries++;
+ }
+
+ return hq;
+}
+
+int
+nv50_hw_metric_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += NV50_HW_METRIC_QUERY_COUNT;
+
+ if (!info)
+ return count;
+
+ if (id < count) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = nv50_hw_metric_names[id];
+ info->query_type = NV50_HW_METRIC_QUERY(id);
+ info->group_id = NV50_HW_METRIC_QUERY_GROUP;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h
new file mode 100644
index 00000000000..f8cfc04084f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h
@@ -0,0 +1,34 @@
+#ifndef __NV50_QUERY_HW_METRIC_H__
+#define __NV50_QUERY_HW_METRIC_H__
+
+#include "nv50_query_hw.h"
+
+struct nv50_hw_metric_query {
+ struct nv50_hw_query base;
+ struct nv50_hw_query *queries[4];
+ unsigned num_queries;
+};
+
+static inline struct nv50_hw_metric_query *
+nv50_hw_metric_query(struct nv50_hw_query *hq)
+{
+ return (struct nv50_hw_metric_query *)hq;
+}
+
+/*
+ * Driver metrics queries:
+ */
+#define NV50_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NV50_HW_METRIC_QUERY_LAST NV50_HW_METRIC_QUERY(NV50_HW_METRIC_QUERY_COUNT - 1)
+enum nv50_hw_metric_queries
+{
+ NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY = 0,
+ NV50_HW_METRIC_QUERY_COUNT
+};
+
+struct nv50_hw_query *
+nv50_hw_metric_create_query(struct nv50_context *, unsigned);
+int
+nv50_hw_metric_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
new file mode 100644
index 00000000000..8453ce76095
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw_sm.h"
+
+#include "nv_object.xml.h"
+#include "nv50/nv50_compute.xml.h"
+
+/* === PERFORMANCE MONITORING COUNTERS for NV84+ === */
+
+/* NOTE: intentionally using the same names as NV */
+static const char *nv50_hw_sm_query_names[] =
+{
+ "branch",
+ "divergent_branch",
+ "instructions",
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+ "sm_cta_launched",
+ "warp_serialize",
+};
+
+static const uint64_t nv50_read_hw_sm_counters_code[] =
+{
+ /* and b32 $r0 $r0 0x0000ffff
+ * add b32 $c0 $r0 $r0 $r0
+ * (lg $c0) ret
+ * mov $r0 $pm0
+ * mov $r1 $pm1
+ * mov $r2 $pm2
+ * mov $r3 $pm3
+ * mov $r4 $physid
+ * ld $r5 b32 s[0x10]
+ * ld $r6 b32 s[0x14]
+ * and b32 $r4 $r4 0x000f0000
+ * shr u32 $r4 $r4 0x10
+ * mul $r4 u24 $r4 0x14
+ * add b32 $r5 $r5 $r4
+ * st b32 g15[$r5] $r0
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r1
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r2
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r3
+ * add b32 $r5 $r5 0x04
+ * exit st b32 g15[$r5] $r6 */
+ 0x00000fffd03f0001ULL,
+ 0x040007c020000001ULL,
+ 0x0000028030000003ULL,
+ 0x6001078000000001ULL,
+ 0x6001478000000005ULL,
+ 0x6001878000000009ULL,
+ 0x6001c7800000000dULL,
+ 0x6000078000000011ULL,
+ 0x4400c78010000815ULL,
+ 0x4400c78010000a19ULL,
+ 0x0000f003d0000811ULL,
+ 0xe410078030100811ULL,
+ 0x0000000340540811ULL,
+ 0x0401078020000a15ULL,
+ 0xa0c00780d00f0a01ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a05ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a09ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a0dULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00781d00f0a19ULL,
+};
+
+struct nv50_hw_sm_counter_cfg
+{
+ uint32_t mode : 4; /* LOGOP, LOGOP_PULSE */
+ uint32_t unit : 8; /* UNK[0-5] */
+ uint32_t sig : 8; /* signal selection */
+};
+
+struct nv50_hw_sm_query_cfg
+{
+ struct nv50_hw_sm_counter_cfg ctr[4];
+ uint8_t num_counters;
+};
+
+#define _Q(n, m, u, s) [NV50_HW_SM_QUERY_##n] = { { { NV50_COMPUTE_MP_PM_CONTROL_MODE_##m, NV50_COMPUTE_MP_PM_CONTROL_UNIT_##u, s, }, {}, {}, {} }, 1 }
+
+/* ==== Compute capability 1.1 (G84+) ==== */
+static const struct nv50_hw_sm_query_cfg sm11_hw_sm_queries[] =
+{
+ _Q(BRANCH, LOGOP, UNK4, 0x02),
+ _Q(DIVERGENT_BRANCH, LOGOP, UNK4, 0x09),
+ _Q(INSTRUCTIONS, LOGOP, UNK4, 0x04),
+ _Q(PROF_TRIGGER_0, LOGOP, UNK1, 0x26),
+ _Q(PROF_TRIGGER_1, LOGOP, UNK1, 0x27),
+ _Q(PROF_TRIGGER_2, LOGOP, UNK1, 0x28),
+ _Q(PROF_TRIGGER_3, LOGOP, UNK1, 0x29),
+ _Q(PROF_TRIGGER_4, LOGOP, UNK1, 0x2a),
+ _Q(PROF_TRIGGER_5, LOGOP, UNK1, 0x2b),
+ _Q(PROF_TRIGGER_6, LOGOP, UNK1, 0x2c),
+ _Q(PROF_TRIGGER_7, LOGOP, UNK1, 0x2d),
+ _Q(SM_CTA_LAUNCHED, LOGOP, UNK1, 0x33),
+ _Q(WARP_SERIALIZE, LOGOP, UNK0, 0x0b),
+};
+
+static inline uint16_t nv50_hw_sm_get_func(uint8_t slot)
+{
+ switch (slot) {
+ case 0: return 0xaaaa;
+ case 1: return 0xcccc;
+ case 2: return 0xf0f0;
+ case 3: return 0xff00;
+ }
+ return 0;
+}
+
+static const struct nv50_hw_sm_query_cfg *
+nv50_hw_sm_query_get_cfg(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ return &sm11_hw_sm_queries[q->type - NV50_HW_SM_QUERY(0)];
+}
+
+static void
+nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ q->funcs->destroy_query(nv50, q);
+}
+
+static boolean
+nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ const struct nv50_hw_sm_query_cfg *cfg;
+ uint16_t func;
+ int i, c;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
+
+ /* check if we have enough free counter slots */
+ if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) {
+ NOUVEAU_ERR("Not enough free MP counter slots !\n");
+ return false;
+ }
+
+ assert(cfg->num_counters <= 4);
+ PUSH_SPACE(push, 4 * 4);
+
+ /* set sequence field to 0 (used to check if result is available) */
+ for (i = 0; i < screen->MPsInTP; ++i) {
+ const unsigned b = (0x14 / 4) * i;
+ hq->data[b + 16] = 0;
+ }
+ hq->sequence++;
+
+ for (i = 0; i < cfg->num_counters; i++) {
+ screen->pm.num_hw_sm_active++;
+
+ /* find free counter slots */
+ for (c = 0; c < 4; ++c) {
+ if (!screen->pm.mp_counter[c]) {
+ hsq->ctr[i] = c;
+ screen->pm.mp_counter[c] = hsq;
+ break;
+ }
+ }
+
+ /* select func to aggregate counters */
+ func = nv50_hw_sm_get_func(c);
+
+ /* configure and reset the counter(s) */
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
+ | cfg->ctr[i].unit | cfg->ctr[i].mode);
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1);
+ PUSH_DATA (push, 0);
+ }
+ return true;
+}
+
+static void
+nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ uint32_t mask;
+ uint32_t input[3];
+ const uint block[3] = { 32, 1, 1 };
+ const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
+ int c;
+
+ if (unlikely(!screen->pm.prog)) {
+ struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
+ prog->type = PIPE_SHADER_COMPUTE;
+ prog->translated = true;
+ prog->max_gpr = 7;
+ prog->parm_size = 8;
+ prog->code = (uint32_t *)nv50_read_hw_sm_counters_code;
+ prog->code_size = sizeof(nv50_read_hw_sm_counters_code);
+ screen->pm.prog = prog;
+ }
+
+ /* disable all counting */
+ PUSH_SPACE(push, 8);
+ for (c = 0; c < 4; c++) {
+ if (screen->pm.mp_counter[c]) {
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ PUSH_DATA (push, 0);
+ }
+ }
+
+ /* release counters for this query */
+ for (c = 0; c < 4; c++) {
+ if (screen->pm.mp_counter[c] == hsq) {
+ screen->pm.num_hw_sm_active--;
+ screen->pm.mp_counter[c] = NULL;
+ }
+ }
+
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
+ hq->bo);
+
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+ pipe->bind_compute_state(pipe, screen->pm.prog);
+ input[0] = hq->bo->offset + hq->base_offset;
+ input[1] = hq->sequence;
+ pipe->launch_grid(pipe, block, grid, 0, input);
+
+ nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
+
+ /* re-active other counters */
+ PUSH_SPACE(push, 8);
+ mask = 0;
+ for (c = 0; c < 4; c++) {
+ const struct nv50_hw_sm_query_cfg *cfg;
+ unsigned i;
+
+ hsq = screen->pm.mp_counter[c];
+ if (!hsq)
+ continue;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base);
+ for (i = 0; i < cfg->num_counters; i++) {
+ uint16_t func;
+
+ if (mask & (1 << hsq->ctr[i]))
+ break;
+
+ mask |= 1 << hsq->ctr[i];
+ func = nv50_hw_sm_get_func(hsq->ctr[i]);
+
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
+ PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
+ | cfg->ctr[i].unit | cfg->ctr[i].mode);
+ }
+ }
+}
+
+static inline bool
+nv50_hw_sm_query_read_data(uint32_t count[32][4],
+ struct nv50_context *nv50, bool wait,
+ struct nv50_hw_query *hq,
+ const struct nv50_hw_sm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ unsigned p, c;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x14 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ if (hq->data[b + 4] != hq->sequence) {
+ if (!wait)
+ return false;
+ if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client))
+ return false;
+ }
+ count[p][c] = hq->data[b + hsq->ctr[c]];
+ }
+ }
+ return true;
+}
+
+static boolean
+nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq,
+ boolean wait, union pipe_query_result *result)
+{
+ uint32_t count[32][4];
+ uint64_t value = 0;
+ unsigned mp_count = MIN2(nv50->screen->MPsInTP, 32);
+ unsigned p, c;
+ const struct nv50_hw_sm_query_cfg *cfg;
+ bool ret;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
+
+ ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count);
+ if (!ret)
+ return false;
+
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][c];
+
+ /* We only count a single TP, and simply multiply by the total number of
+ * TPs to compute result over all TPs. This is inaccurate, but enough! */
+ value *= nv50->screen->TPs;
+
+ *(uint64_t *)result = value;
+ return true;
+}
+
+static const struct nv50_hw_query_funcs hw_sm_query_funcs = {
+ .destroy_query = nv50_hw_sm_destroy_query,
+ .begin_query = nv50_hw_sm_begin_query,
+ .end_query = nv50_hw_sm_end_query,
+ .get_query_result = nv50_hw_sm_get_query_result,
+};
+
+struct nv50_hw_query *
+nv50_hw_sm_create_query(struct nv50_context *nv50, unsigned type)
+{
+ struct nv50_hw_sm_query *hsq;
+ struct nv50_hw_query *hq;
+ unsigned space;
+
+ if (type < NV50_HW_SM_QUERY(0) || type > NV50_HW_SM_QUERY_LAST)
+ return NULL;
+
+ hsq = CALLOC_STRUCT(nv50_hw_sm_query);
+ if (!hsq)
+ return NULL;
+
+ hq = &hsq->base;
+ hq->funcs = &hw_sm_query_funcs;
+ hq->base.type = type;
+
+ /*
+ * for each MP:
+ * [00] = MP.C0
+ * [04] = MP.C1
+ * [08] = MP.C2
+ * [0c] = MP.C3
+ * [10] = MP.sequence
+ */
+ space = (4 + 1) * nv50->screen->MPsInTP * sizeof(uint32_t);
+
+ if (!nv50_hw_query_allocate(nv50, &hq->base, space)) {
+ FREE(hq);
+ return NULL;
+ }
+
+ return hq;
+}
+
+int
+nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += NV50_HW_SM_QUERY_COUNT;
+
+ if (!info)
+ return count;
+
+ if (id < count) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = nv50_hw_sm_query_names[id];
+ info->query_type = NV50_HW_SM_QUERY(id);
+ info->group_id = NV50_HW_SM_QUERY_GROUP;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h
new file mode 100644
index 00000000000..c1a1cd175e3
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h
@@ -0,0 +1,45 @@
+#ifndef __NV50_QUERY_HW_SM_H__
+#define __NV50_QUERY_HW_SM_H__
+
+#include "nv50_query_hw.h"
+
+struct nv50_hw_sm_query {
+ struct nv50_hw_query base;
+ uint8_t ctr[4];
+};
+
+static inline struct nv50_hw_sm_query *
+nv50_hw_sm_query(struct nv50_hw_query *hq)
+{
+ return (struct nv50_hw_sm_query *)hq;
+}
+
+/*
+ * Performance counter queries:
+ */
+#define NV50_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NV50_HW_SM_QUERY_LAST NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_COUNT - 1)
+enum nv50_hw_sm_queries
+{
+ NV50_HW_SM_QUERY_BRANCH = 0,
+ NV50_HW_SM_QUERY_DIVERGENT_BRANCH,
+ NV50_HW_SM_QUERY_INSTRUCTIONS,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_0,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_1,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_2,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_3,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_4,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_5,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_6,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_7,
+ NV50_HW_SM_QUERY_SM_CTA_LAUNCHED,
+ NV50_HW_SM_QUERY_WARP_SERIALIZE,
+ NV50_HW_SM_QUERY_COUNT,
+};
+
+struct nv50_hw_query *
+nv50_hw_sm_create_query(struct nv50_context *, unsigned);
+int
+nv50_hw_sm_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index f47e998ab1e..1e4b75f18e0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -41,8 +41,6 @@
#define THREADS_IN_WARP 32
-#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
-
static boolean
nv50_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_COMPUTE:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
@@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_COMPUTE:
break;
default:
return 0;
@@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
return 0.0f;
}
+static int
+nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+#define RET(x) do { \
+ if (data) \
+ memcpy(data, x, sizeof(x)); \
+ return sizeof(x); \
+} while (0)
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ RET((uint64_t []) { 2 });
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ RET(((uint64_t []) { 65535, 65535 }));
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ RET(((uint64_t []) { 512, 512, 64 }));
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ RET((uint64_t []) { 512 });
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
+ RET((uint64_t []) { 1ULL << 32 });
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ RET((uint64_t []) { 4096 });
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ RET((uint32_t []) { 32 });
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ RET((uint64_t []) { 1ULL << 40 });
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t []) { 0 });
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t []) { screen->mp_count });
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+ default:
+ return 0;
+ }
+
+#undef RET
+}
+
static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
@@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->tesla);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
nouveau_object_del(&screen->sync);
nouveau_screen_fini(&screen->base);
@@ -640,7 +686,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, 0);
if (screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, 0);
}
@@ -742,6 +788,9 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
+ pscreen->get_compute_param = nv50_screen_get_compute_param;
+ pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+ pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
nv50_screen_init_resource_functions(pscreen);
@@ -851,6 +900,8 @@ nv50_screen_create(struct nouveau_device *dev)
screen->TPs = util_bitcount(value & 0xffff);
screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+ screen->mp_count = screen->TPs * screen->MPsInTP;
+
stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
STACK_WARPS_ALLOC * 64 * 8;
@@ -902,6 +953,12 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_hwctx(screen);
+ ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
+ if (ret) {
+ NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
+ goto fail;
+ }
+
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index ce51f0fc254..2a4983d1020 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -23,6 +23,10 @@ struct nv50_context;
#define NV50_MAX_VIEWPORTS 16
+#define NV50_MAX_GLOBALS 16
+
+#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
+
struct nv50_blitter;
struct nv50_graph_state {
@@ -66,6 +70,7 @@ struct nv50_screen {
unsigned MPsInTP;
unsigned max_tls_space;
unsigned cur_tls_space;
+ unsigned mp_count;
struct nouveau_heap *vp_code_heap;
struct nouveau_heap *gp_code_heap;
@@ -90,9 +95,16 @@ struct nv50_screen {
struct nouveau_bo *bo;
} fence;
+ struct {
+ struct nv50_program *prog; /* compute state object to read MP counters */
+ struct nv50_hw_sm_query *mp_counter[4]; /* counter to query allocation */
+ uint8_t num_hw_sm_active;
+ } pm;
+
struct nouveau_object *sync;
struct nouveau_object *tesla;
+ struct nouveau_object *compute;
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
};
@@ -103,12 +115,19 @@ nv50_screen(struct pipe_screen *screen)
return (struct nv50_screen *)screen;
}
+int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_info *);
+int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_group_info *);
+
bool nv50_blitter_create(struct nv50_screen *);
void nv50_blitter_destroy(struct nv50_screen *);
int nv50_screen_tic_alloc(struct nv50_screen *, void *);
int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
+int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *);
+
static inline void
nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
{
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index d27f12ca94b..b4ea08d4d13 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
nv50->dirty |= NV50_NEW_GMTYPROG;
}
+static void *
+nv50_cp_state_create(struct pipe_context *pipe,
+ const struct pipe_compute_state *cso)
+{
+ struct nv50_program *prog;
+
+ prog = CALLOC_STRUCT(nv50_program);
+ if (!prog)
+ return NULL;
+ prog->type = PIPE_SHADER_COMPUTE;
+
+ prog->cp.smem_size = cso->req_local_mem;
+ prog->cp.lmem_size = cso->req_private_mem;
+ prog->parm_size = cso->req_input_mem;
+
+ prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
+
+ return (void *)prog;
+}
+
+static void
+nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->compprog = hwcso;
+ nv50->dirty_cp |= NV50_NEW_CP_PROGRAM;
+}
+
static void
nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_constant_buffer *cb)
@@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
nv50->dirty |= NV50_NEW_STRMOUT;
}
+static void
+nv50_set_compute_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ /* TODO: bind surfaces */
+}
+
+static inline void
+nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
+{
+ struct nv04_resource *buf = nv04_resource(res);
+ if (buf) {
+ uint64_t limit = (buf->address + buf->base.width0) - 1;
+ if (limit < (1ULL << 32)) {
+ *phandle = (uint32_t)buf->address;
+ } else {
+ NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
+ "resource not contained within 32-bit address space !\n");
+ *phandle = 0;
+ }
+ } else {
+ *phandle = 0;
+ }
+}
+
+static void
+nv50_set_global_bindings(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_resource **ptr;
+ unsigned i;
+ const unsigned end = start + nr;
+
+ if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
+ const unsigned old_size = nv50->global_residents.size;
+ const unsigned req_size = end * sizeof(struct pipe_resource *);
+ util_dynarray_resize(&nv50->global_residents, req_size);
+ memset((uint8_t *)nv50->global_residents.data + old_size, 0,
+ req_size - old_size);
+ }
+
+ if (resources) {
+ ptr = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i) {
+ pipe_resource_reference(&ptr[i], resources[i]);
+ nv50_set_global_handle(handles[i], resources[i]);
+ }
+ } else {
+ ptr = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i)
+ pipe_resource_reference(&ptr[i], NULL);
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL);
+
+ nv50->dirty_cp = NV50_NEW_CP_GLOBALS;
+}
+
void
nv50_init_state_functions(struct nv50_context *nv50)
{
@@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->create_vs_state = nv50_vp_state_create;
pipe->create_fs_state = nv50_fp_state_create;
pipe->create_gs_state = nv50_gp_state_create;
+ pipe->create_compute_state = nv50_cp_state_create;
pipe->bind_vs_state = nv50_vp_state_bind;
pipe->bind_fs_state = nv50_fp_state_bind;
pipe->bind_gs_state = nv50_gp_state_bind;
+ pipe->bind_compute_state = nv50_cp_state_bind;
pipe->delete_vs_state = nv50_sp_state_delete;
pipe->delete_fs_state = nv50_sp_state_delete;
pipe->delete_gs_state = nv50_sp_state_delete;
+ pipe->delete_compute_state = nv50_sp_state_delete;
pipe->set_blend_color = nv50_set_blend_color;
pipe->set_stencil_ref = nv50_set_stencil_ref;
@@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->stream_output_target_destroy = nv50_so_target_destroy;
pipe->set_stream_output_targets = nv50_set_stream_output_targets;
+ pipe->set_global_binding = nv50_set_global_bindings;
+ pipe->set_compute_resources = nv50_set_compute_resources;
+
nv50->sample_mask = ~0;
nv50->min_samples = 1;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index b6181edf24f..02a759c23ad 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -503,8 +503,7 @@ static struct state_validate {
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
- NV50_NEW_VERTPROG },
+ { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
{ nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 916a7d44a31..8ba19d2cc90 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -339,12 +339,18 @@ nv50_clear_render_target(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16) | dstx);
PUSH_DATA (push, (height << 16) | dsty);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, 0x3c |
(z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
@@ -415,12 +421,18 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16) | dstx);
PUSH_DATA (push, (height << 16) | dsty);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, mode |
(z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
@@ -673,6 +685,9 @@ nv50_clear_buffer(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16));
PUSH_DATA (push, (height << 16));
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, 0x3c);
@@ -690,6 +705,9 @@ nv50_clear_buffer(struct pipe_context *pipe,
PUSH_DATA (push, 0x3c);
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 9aa593f919e..85878d5fcc7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -294,8 +294,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
uint64_t addrs[PIPE_MAX_ATTRIBS];
uint32_t limits[PIPE_MAX_ATTRIBS];
struct nouveau_pushbuf *push = nv50->base.pushbuf;
- struct nv50_vertex_stateobj dummy = {};
- struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
+ struct nv50_vertex_stateobj *vertex = nv50->vertex;
struct pipe_vertex_buffer *vb;
struct nv50_vertex_element *ve;
uint32_t mask;
@@ -303,14 +302,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
unsigned i;
const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
- /* A vertexid is not generated for inline data uploads. Have to use a
- * VBO. This check must come after the vertprog has been validated,
- * otherwise vertexid may be unset.
- */
- assert(nv50->vertprog->translated);
- if (nv50->vertprog->vp.vertexid)
- nv50->vbo_push_hint = 0;
-
if (unlikely(vertex->need_conversion))
nv50->vbo_fifo = ~0;
else
@@ -487,7 +478,7 @@ nv50_draw_arrays(struct nv50_context *nv50,
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, 0);
if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, 0);
}
nv50->state.index_bias = 0;
@@ -613,7 +604,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, index_bias);
if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, index_bias);
}
nv50->state.index_bias = index_bias;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index 76f1b41ea70..68002305d72 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define SUBC_3D(m) 3, (m)
#define NV50_3D(n) SUBC_3D(NV50_3D_##n)
+#define NV84_3D(n) SUBC_3D(NV84_3D_##n)
#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
#define SUBC_2D(m) 4, (m)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 82ed5a1864e..162661ff2a7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
+ unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
@@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_COMMAND_ARGS_BUFFER |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_COMMAND_ARGS_BUFFER |
+ PIPE_BIND_SAMPLER_VIEW)) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
nvc0->dirty |= NVC0_NEW_ARRAYS;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f53921092a5..d992b10a23c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+ info->flags = 0;
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
@@ -200,7 +201,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
/* Because we can't expose the number of hardware counters needed for
* each different query, we don't want to allow more than one active
@@ -224,7 +224,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
if (screen->compute) {
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->name = "Performance metrics";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
info->max_active_queries = 1;
info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
return 1;
@@ -234,7 +233,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
return 1;
@@ -245,7 +243,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
info->name = "this_is_not_the_query_group_you_are_looking_for";
info->max_active_queries = 0;
info->num_queries = 0;
- info->type = 0;
return 0;
}
@@ -260,4 +257,5 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
pipe->render_condition = nvc0_render_condition;
+ nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 44b222e5134..7962143d45a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -1014,14 +1014,15 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
prog->type = PIPE_SHADER_COMPUTE;
prog->translated = true;
- prog->num_gprs = 14;
prog->parm_size = 12;
if (is_nve4) {
prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
+ prog->num_gprs = 14;
} else {
prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
+ prog->num_gprs = 12;
}
screen->pm.prog = prog;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index cdb1fc1145f..6a4ae5be2ab 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -341,12 +341,16 @@ nvc0_clear_render_target(struct pipe_context *pipe,
nvc0_resource_fence(res, NOUVEAU_BO_WR);
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, 0x3c |
(z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
@@ -470,6 +474,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
if (width * height != elements) {
@@ -486,6 +492,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -545,12 +553,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
PUSH_DATA (push, dst->u.tex.first_layer);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, mode |
(z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}