diff options
Diffstat (limited to 'src/gallium/drivers/nouveau')
30 files changed, 2026 insertions, 55 deletions
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 83f81135590..31a93659647 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -64,6 +64,8 @@ NV50_C_SOURCES := \ nv50/nv50_3ddefs.xml.h \ nv50/nv50_3d.xml.h \ nv50/nv50_blit.h \ + nv50/nv50_compute.c \ + nv50/nv50_compute.xml.h \ nv50/nv50_context.c \ nv50/nv50_context.h \ nv50/nv50_defs.xml.h \ @@ -76,6 +78,10 @@ NV50_C_SOURCES := \ nv50/nv50_query.h \ nv50/nv50_query_hw.c \ nv50/nv50_query_hw.h \ + nv50/nv50_query_hw_metric.c \ + nv50/nv50_query_hw_metric.h \ + nv50/nv50_query_hw_sm.c \ + nv50/nv50_query_hw_sm.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 2a13e1086a0..9f84de03a4a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -2357,6 +2357,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) case OP_PFETCH: emitPFETCH(insn); break; + case OP_AFETCH: + emitAFETCH(insn); + break; case OP_EMIT: case OP_RESTART: emitOUT(insn); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 7859c8e79bd..41d2cc9167c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) Instruction *st; if (slot->reg.file == FILE_MEMORY_LOCAL) { - st = new_Instruction(func, OP_STORE, ty); - st->setSrc(0, slot); - st->setSrc(1, lval); lval->noSpill = 1; + if (ty != TYPE_B96) { + st = new_Instruction(func, OP_STORE, ty); + st->setSrc(0, slot); + st->setSrc(1, lval); + } else { + st = new_Instruction(func, OP_SPLIT, ty); + st->setSrc(0, lval); + for (int d = 0; d < lval->reg.size / 4; ++d) + st->setDef(d, new_LValue(func, FILE_GPR)); + + for (int d = lval->reg.size / 4 - 1; d >= 0; --d) { + Value *tmp = cloneShallow(func, slot); + tmp->reg.size = 4; + tmp->reg.data.offset += 4 * d; + + Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32); + s->setSrc(0, tmp); + s->setSrc(1, st->getDef(d)); + defi->bb->insertAfter(defi, s); + } + } } else { st = new_Instruction(func, OP_CVT, ty); st->setDef(0, slot); @@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) Instruction *ld; if (slot->reg.file == FILE_MEMORY_LOCAL) { lval->noSpill = 1; - ld = new_Instruction(func, OP_LOAD, ty); + if (ty != TYPE_B96) { + ld = new_Instruction(func, OP_LOAD, ty); + } else { + ld = new_Instruction(func, OP_MERGE, ty); + for (int d = 0; d < lval->reg.size / 4; ++d) { + Value *tmp = cloneShallow(func, slot); + LValue *val; + tmp->reg.size = 4; + tmp->reg.data.offset += 4 * d; + + Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32); + l->setDef(0, (val = new_LValue(func, FILE_GPR))); + l->setSrc(0, tmp); + usei->bb->insertBefore(usei, l); + ld->setSrc(d, val); + val->noSpill = 1; + } + ld->setDef(0, lval); + usei->bb->insertBefore(usei, ld); + return lval; + } } else { ld = new_Instruction(func, OP_CVT, ty); } diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 68e69beb08f..1695553d793 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -657,8 +657,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen, if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { buffer->domain = NOUVEAU_BO_GART; - } else if (buffer->base.bind & - (screen->vidmem_bindings & screen->sysmem_bindings)) { + } else if (buffer->base.bind == 0 || (buffer->base.bind & + (screen->vidmem_bindings & screen->sysmem_bindings))) { switch (buffer->base.usage) { case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: @@ -685,6 +685,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen, if (buffer->base.bind & screen->sysmem_bindings) buffer->domain = NOUVEAU_BO_GART; } + /* There can be very special situations where we want non-gpu-mapped + * buffers, but never through this interface. + */ + assert(buffer->domain); ret = nouveau_buffer_allocate(screen, buffer, buffer->domain); if (ret == false) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c new file mode 100644 index 00000000000..6d23fd66945 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -0,0 +1,320 @@ +/* + * Copyright 2012 Francisco Jerez + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_compute.xml.h" + +#include "codegen/nv50_ir_driver.h" + +int +nv50_screen_compute_setup(struct nv50_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_device *dev = screen->base.device; + struct nouveau_object *chan = screen->base.channel; + struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; + unsigned obj_class; + int i, ret; + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + obj_class = NV50_COMPUTE_CLASS; + break; + case 0xa0: + switch (dev->chipset) { + case 0xa3: + case 0xa5: + case 0xa8: + obj_class = NVA3_COMPUTE_CLASS; + break; + default: + obj_class = NV50_COMPUTE_CLASS; + break; + } + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, + &screen->compute); + if (ret) + return ret; + + BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->handle); + + BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->stack_bo->offset); + PUSH_DATA (push, screen->stack_bo->offset); + BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1); + PUSH_DATA (push, 4); + + BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1); + PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); + BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1); + PUSH_DATA (push, 0x100); + BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1); + PUSH_DATA (push, fifo->vram); + + for (i = 0; i < 15; i++) { + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + } + + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1); + PUSH_DATA (push, ~0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1); + PUSH_DATA (push, 7); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1); + PUSH_DATA (push, 7); + BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1); + PUSH_DATA (push, 0x54); + BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1); + PUSH_DATA (push, fifo->vram); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls_bo->offset + 65536); + PUSH_DATA (push, screen->tls_bo->offset + 65536); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1); + PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); + + return 0; +} + +static bool +nv50_compute_validate_program(struct nv50_context *nv50) +{ + struct nv50_program *prog = nv50->compprog; + + if (prog->mem) + return true; + + if (!prog->translated) { + prog->translated = nv50_program_translate( + prog, nv50->screen->base.device->chipset, &nv50->base.debug); + if (!prog->translated) + return false; + } + if (unlikely(!prog->code_size)) + return false; + + if (likely(prog->code_size)) { + if (nv50_program_upload_code(nv50, prog)) { + struct nouveau_pushbuf *push = nv50->base.pushbuf; + BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1); + PUSH_DATA (push, 0); + return true; + } + } + return false; +} + +static void +nv50_compute_validate_globals(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource *res = *util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, i); + if (res) + nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL, + nv04_resource(res), NOUVEAU_BO_RDWR); + } +} + +static bool +nv50_compute_state_validate(struct nv50_context *nv50) +{ + if (!nv50_compute_validate_program(nv50)) + return false; + + if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS) + nv50_compute_validate_globals(nv50); + + /* TODO: validate textures, samplers, surfaces */ + + nv50_bufctx_fence(nv50->bufctx_cp, false); + + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf))) + return false; + if (unlikely(nv50->state.flushed)) + nv50_bufctx_fence(nv50->bufctx_cp, true); + + return true; +} + +static void +nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_pushbuf *push = screen->base.pushbuf; + unsigned size = align(nv50->compprog->parm_size, 0x4); + + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + PUSH_DATA (push, (size / 4) << 8); + + if (size) { + struct nouveau_mm_allocation *mm; + struct nouveau_bo *bo = NULL; + unsigned offset; + + mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset); + assert(mm); + + nouveau_bo_map(bo, 0, screen->base.client); + memcpy(bo->map + offset, input, size); + + nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4); + nouveau_pushbuf_data(push, bo, offset, size); + + nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); + nouveau_bo_ref(NULL, &bo); + nouveau_bufctx_reset(nv50->bufctx, 0); + } +} + +static uint32_t +nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label) +{ + struct nv50_program *prog = nv50->compprog; + const struct nv50_ir_prog_symbol *syms = + (const struct nv50_ir_prog_symbol *)prog->cp.syms; + unsigned i; + + for (i = 0; i < prog->cp.num_syms; ++i) { + if (syms[i].label == label) + return prog->code_base + syms[i].offset; + } + return prog->code_base; /* no symbols or symbol not found */ +} + +void +nv50_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, const void *input) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2]; + struct nv50_program *cp = nv50->compprog; + bool ret; + + ret = !nv50_compute_state_validate(nv50); + if (ret) { + NOUVEAU_ERR("Failed to launch grid !\n"); + return; + } + + nv50_compute_upload_input(nv50, input); + + BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); + PUSH_DATA (push, nv50_compute_find_symbol(nv50, label)); + + BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); + PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); + BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, cp->max_gpr); + + /* grid/block setup */ + BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); + PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]); + PUSH_DATA (push, block_layout[2]); + BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); + PUSH_DATA (push, 1 << 16 | block_size); + BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); + PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]); + BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); + PUSH_DATA (push, 1); + + /* kernel launching */ + BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + + /* bind a compute shader clobbers fragment shader state */ + nv50->dirty |= NV50_NEW_FRAGPROG; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h new file mode 100644 index 00000000000..268d11253b6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h @@ -0,0 +1,444 @@ +#ifndef NV50_COMPUTE_XML +#define NV50_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- rnndb/graph/g80_compute.xml ( 14027 bytes, from 2015-02-14 02:01:36) +- rnndb/copyright.xml ( 6456 bytes, from 2015-02-14 02:01:36) +- rnndb/nvchipsets.xml ( 2833 bytes, from 2015-04-28 16:28:33) +- rnndb/fifo/nv_object.xml ( 15390 bytes, from 2015-04-22 20:36:09) +- rnndb/g80_defs.xml ( 18210 bytes, from 2015-10-19 20:49:59) + +Copyright (C) 2006-2015 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV50_COMPUTE_DMA_NOTIFY 0x00000180 + +#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0 + +#define NV50_COMPUTE_DMA_QUERY 0x000001a4 + +#define NV50_COMPUTE_DMA_LOCAL 0x000001b8 + +#define NV50_COMPUTE_DMA_STACK 0x000001bc + +#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0 + +#define NV50_COMPUTE_DMA_TSC 0x000001c4 + +#define NV50_COMPUTE_DMA_TIC 0x000001c8 + +#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc + +#define NV50_COMPUTE_UNK0200 0x00000200 +#define NV50_COMPUTE_UNK0200_UNK1__MASK 0x0000ffff +#define NV50_COMPUTE_UNK0200_UNK1__SHIFT 0 +#define NV50_COMPUTE_UNK0200_UNK2__MASK 0x00ff0000 +#define NV50_COMPUTE_UNK0200_UNK2__SHIFT 16 + +#define NV50_COMPUTE_UNK0204 0x00000204 + +#define NV50_COMPUTE_UNK0208 0x00000208 + +#define NV50_COMPUTE_UNK020C 0x0000020c + +#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210 + +#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214 + +#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218 + +#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c + +#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220 + +#define NV50_COMPUTE_CALL_LIMIT_LOG 0x00000224 + +#define NV50_COMPUTE_UNK0228 0x00000228 +#define NV50_COMPUTE_UNK0228_UNK0 0x00000001 +#define NV50_COMPUTE_UNK0228_UNK4__MASK 0x00000ff0 +#define NV50_COMPUTE_UNK0228_UNK4__SHIFT 4 +#define NV50_COMPUTE_UNK0228_UNK12__MASK 0x000ff000 +#define NV50_COMPUTE_UNK0228_UNK12__SHIFT 12 + +#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c + +#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230 +#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NV50_COMPUTE_TSC_LIMIT 0x00000234 +#define NV50_COMPUTE_TSC_LIMIT__MAX 0x00001fff + +#define NV50_COMPUTE_CB_ADDR 0x00000238 +#define NV50_COMPUTE_CB_ADDR_ID__MASK 0x003fff00 +#define NV50_COMPUTE_CB_ADDR_ID__SHIFT 8 +#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK 0x0000007f +#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT 0 + +#define NV50_COMPUTE_CB_DATA(i0) (0x0000023c + 0x4*(i0)) +#define NV50_COMPUTE_CB_DATA__ESIZE 0x00000004 +#define NV50_COMPUTE_CB_DATA__LEN 0x00000010 + +#define NV50_COMPUTE_TSC_FLUSH 0x0000027c +#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_COMPUTE_TIC_FLUSH 0x00000280 +#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_COMPUTE_DELAY1 0x00000284 + +#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288 + +#define NV50_COMPUTE_DELAY2 0x0000028c + +#define NV50_COMPUTE_UNK0290 0x00000290 + +#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294 + +#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298 +#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN 0x00000100 + +#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c + +#define NV50_COMPUTE_UNK02A0 0x000002a0 + +#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4 + +#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8 + +#define NV50_COMPUTE_CB_DEF_SET 0x000002ac +#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK 0x0000ffff +#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT 0 +#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK 0x007f0000 +#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT 16 + +#define NV50_COMPUTE_UNK02B0 0x000002b0 + +#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4 +#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK 0x0000ffff +#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT 0 +#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK 0x00ff0000 +#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT 16 + +#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8 + +#define NV50_COMPUTE_UNK02BC 0x000002bc +#define NV50_COMPUTE_UNK02BC_UNK1__MASK 0x00000007 +#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT 0 +#define NV50_COMPUTE_UNK02BC_UNK2__MASK 0x00000070 +#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT 4 + +#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0 + +#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4 + +#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8 + +#define NV50_COMPUTE_TIC_LIMIT 0x000002cc + +#define NV50_COMPUTE_MP_PM_SET(i0) (0x000002d0 + 0x4*(i0)) +#define NV50_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NV50_COMPUTE_MP_PM_SET__LEN 0x00000004 + +#define NV50_COMPUTE_MP_PM_CONTROL(i0) (0x000002e0 + 0x4*(i0)) +#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE 0x00000004 +#define NV50_COMPUTE_MP_PM_CONTROL__LEN 0x00000004 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK 0x00000001 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT 0 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP 0x00000000 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE 0x00000001 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK 0x00000070 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT 4 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0 0x00000000 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1 0x00000010 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2 0x00000020 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3 0x00000030 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4 0x00000040 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5 0x00000050 +#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK 0x00ffff00 +#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT 8 +#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK 0xff000000 +#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT 24 + +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE 0x000002f0 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0 0x00000001 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1 0x00000002 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2 0x00000004 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3 0x00000008 + +#define NV50_COMPUTE_UNK02F4 0x000002f4 + +#define NV50_COMPUTE_BLOCKDIM_LATCH 0x000002f8 + +#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc + +#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300 + +#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304 + +#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308 + +#define NV50_COMPUTE_UNK030C 0x0000030c + +#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310 + +#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314 + +#define NV50_COMPUTE_QUERY_SEQUENCE 0x00000318 + +#define NV50_COMPUTE_QUERY_GET 0x0000031c +#define NV50_COMPUTE_QUERY_GET_INTR 0x00000200 +#define NV50_COMPUTE_QUERY_GET_SHORT 0x00008000 + +#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320 + +#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324 + +#define NV50_COMPUTE_COND_MODE 0x00000328 +#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NV50_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_COMPUTE_UNK032C 0x0000032c + +#define NV50_COMPUTE_UNK0330 0x00000330 + +#define NV50_COMPUTE_UNK0334(i0) (0x00000334 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0334__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0334__LEN 0x00000003 + +#define NV50_COMPUTE_UNK0340(i0) (0x00000340 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0340__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0340__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0348(i0) (0x00000348 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0348__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0348__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0350(i0) (0x00000350 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0350__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0350__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0358 0x00000358 + +#define NV50_COMPUTE_UNK035C 0x0000035c + +#define NV50_COMPUTE_UNK0360 0x00000360 +#define NV50_COMPUTE_UNK0360_UNK0__MASK 0x000000f0 +#define NV50_COMPUTE_UNK0360_UNK0__SHIFT 4 +#define NV50_COMPUTE_UNK0360_UNK1__MASK 0x00000f00 +#define NV50_COMPUTE_UNK0360_UNK1__SHIFT 8 + +#define NV50_COMPUTE_UNK0364 0x00000364 + +#define NV50_COMPUTE_LAUNCH 0x00000368 + +#define NV50_COMPUTE_UNK036C 0x0000036c + +#define NV50_COMPUTE_UNK0370 0x00000370 + +#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374 +#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK 0x000000ff +#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT 0 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK 0x0000ff00 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT 8 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX 0x00000040 + +#define NV50_COMPUTE_LINKED_TSC 0x00000378 + +#define NV50_COMPUTE_UNK037C 0x0000037c +#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV 0x00000001 +#define NV50_COMPUTE_UNK037C_UNK16 0x00010000 + +#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380 + +#define NV50_COMPUTE_UNK0384 0x00000384 + +#define NV50_COMPUTE_GRIDID 0x00000388 + +#define NV50_COMPUTE_UNK038C(i0) (0x0000038c + 0x4*(i0)) +#define NV50_COMPUTE_UNK038C__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK038C__LEN 0x00000003 + +#define NV50_COMPUTE_WRCACHE_FLUSH 0x00000398 + +#define NV50_COMPUTE_UNK039C(i0) (0x0000039c + 0x4*(i0)) +#define NV50_COMPUTE_UNK039C__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK039C__LEN 0x00000002 + +#define NV50_COMPUTE_GRIDDIM 0x000003a4 +#define NV50_COMPUTE_GRIDDIM_X__MASK 0x0000ffff +#define NV50_COMPUTE_GRIDDIM_X__SHIFT 0 +#define NV50_COMPUTE_GRIDDIM_Y__MASK 0xffff0000 +#define NV50_COMPUTE_GRIDDIM_Y__SHIFT 16 + +#define NV50_COMPUTE_SHARED_SIZE 0x000003a8 +#define NV50_COMPUTE_SHARED_SIZE__MAX 0x00004000 +#define NV50_COMPUTE_SHARED_SIZE__ALIGN 0x00000040 + +#define NV50_COMPUTE_BLOCKDIM_XY 0x000003ac +#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK 0x0000ffff +#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT 0 +#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK 0xffff0000 +#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT 16 + +#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0 +#define NV50_COMPUTE_BLOCKDIM_Z__MIN 0x00000001 +#define NV50_COMPUTE_BLOCKDIM_Z__MAX 0x00000040 + +#define NV50_COMPUTE_CP_START_ID 0x000003b4 + +#define NV50_COMPUTE_REG_MODE 0x000003b8 +#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001 +#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002 + +#define NV50_COMPUTE_TEX_LIMITS 0x000003bc +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NV50_COMPUTE_BIND_TSC 0x000003c0 +#define NV50_COMPUTE_BIND_TSC_VALID 0x00000001 +#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK 0x000000f0 +#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4 +#define NV50_COMPUTE_BIND_TSC_TSC__MASK 0x001ff000 +#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT 12 + +#define NV50_COMPUTE_BIND_TIC 0x000003c4 +#define NV50_COMPUTE_BIND_TIC_VALID 0x00000001 +#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1 +#define NV50_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT 9 + +#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8 +#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK 0x00000f00 +#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT 8 +#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000 +#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT 12 +#define NV50_COMPUTE_SET_PROGRAM_CB_VALID 0x000000ff + +#define NV50_COMPUTE_UNK03CC 0x000003cc + +#define NV50_COMPUTE_TEX_CACHE_CTL 0x000003d0 +#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NV50_COMPUTE_UNK03D4 0x000003d4 + +#define NV50_COMPUTE_UNK03D8 0x000003d8 + +#define NV50_COMPUTE_UNK03DC 0x000003dc + +#define NV50_COMPUTE_UNK03E0 0x000003e0 + +#define NV50_COMPUTE_UNK03E4 0x000003e4 + +#define NVA3_COMPUTE_TEX_MISC 0x000003e8 +#define NVA3_COMPUTE_TEX_MISC_UNK1 0x00000001 +#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002 + +#define NV50_COMPUTE_GLOBAL(i0) (0x00000400 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL__ESIZE 0x00000020 +#define NV50_COMPUTE_GLOBAL__LEN 0x00000010 + +#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0) (0x00000400 + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0) (0x00000404 + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_PITCH(i0) (0x00000408 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL_PITCH__MAX 0x00800000 +#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN 0x00000100 + +#define NV50_COMPUTE_GLOBAL_LIMIT(i0) (0x0000040c + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_MODE(i0) (0x00000410 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL_MODE_LINEAR 0x00000001 +#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK 0x000000f0 +#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT 4 +#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK 0x00000f00 +#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT 8 + +#define NV50_COMPUTE_USER_PARAM(i0) (0x00000600 + 0x4*(i0)) +#define NV50_COMPUTE_USER_PARAM__ESIZE 0x00000004 +#define NV50_COMPUTE_USER_PARAM__LEN 0x00000040 + +#define NV50_COMPUTE_UNK0700(i0) (0x00000700 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0700__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0700__LEN 0x00000010 + + +#endif /* NV50_COMPUTE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 7867c2df7f3..4874b77b1e1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50) nouveau_bufctx_del(&nv50->bufctx_3d); nouveau_bufctx_del(&nv50->bufctx); + nouveau_bufctx_del(&nv50->bufctx_cp); util_unreference_framebuffer_state(&nv50->framebuffer); @@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50) if (!nv50->constbuf[s][i].user) pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL); } + + for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource **res = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, i); + pipe_resource_reference(res, NULL); + } + util_dynarray_fini(&nv50->global_residents); } static void @@ -159,9 +168,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, int ref) { struct nv50_context *nv50 = nv50_context(&ctx->pipe); + unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER; unsigned s, i; - if (res->bind & PIPE_BIND_RENDER_TARGET) { + if (bind & PIPE_BIND_RENDER_TARGET) { assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS); for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) { if (nv50->framebuffer.cbufs[i] && @@ -173,7 +183,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, } } } - if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (bind & PIPE_BIND_DEPTH_STENCIL) { if (nv50->framebuffer.zsbuf && nv50->framebuffer.zsbuf->texture == res) { nv50->dirty |= NV50_NEW_FRAMEBUFFER; @@ -183,11 +193,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (res->bind & (PIPE_BIND_VERTEX_BUFFER | - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_SAMPLER_VIEW)) { + if (bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_SAMPLER_VIEW)) { assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); for (i = 0; i < nv50->num_vtxbufs; ++i) { @@ -263,10 +273,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) nv50->base.pushbuf = screen->base.pushbuf; nv50->base.client = screen->base.client; - ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT, - &nv50->bufctx_3d); + ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT, + &nv50->bufctx_3d); if (!ret) - ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT, + &nv50->bufctx_cp); if (ret) goto out_err; @@ -290,6 +303,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) pipe->draw_vbo = nv50_draw_vbo; pipe->clear = nv50_clear; + pipe->launch_grid = nv50_launch_grid; pipe->flush = nv50_flush; pipe->texture_barrier = nv50_texture_barrier; @@ -335,19 +349,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms); BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc); BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo); + if (screen->compute) { + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo); + } flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo); + if (screen->compute) + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); nv50->base.scratch.bo_size = 2 << 20; + util_dynarray_init(&nv50->global_residents); + return pipe; out_err: if (nv50->bufctx_3d) nouveau_bufctx_del(&nv50->bufctx_3d); + if (nv50->bufctx_cp) + nouveau_bufctx_del(&nv50->bufctx_cp); if (nv50->bufctx) nouveau_bufctx_del(&nv50->bufctx); FREE(nv50->blit); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index fb74a9748a3..2cebcd99423 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -49,6 +49,10 @@ #define NV50_NEW_MIN_SAMPLES (1 << 22) #define NV50_NEW_CONTEXT (1 << 31) +#define NV50_NEW_CP_PROGRAM (1 << 0) +#define NV50_NEW_CP_GLOBALS (1 << 1) + +/* 3d bufctx (during draw_vbo, blit_3d) */ #define NV50_BIND_FB 0 #define NV50_BIND_VERTEX 1 #define NV50_BIND_VERTEX_TMP 2 @@ -58,7 +62,15 @@ #define NV50_BIND_SO 53 #define NV50_BIND_SCREEN 54 #define NV50_BIND_TLS 55 -#define NV50_BIND_COUNT 56 +#define NV50_BIND_3D_COUNT 56 + +/* compute bufctx (during launch_grid) */ +#define NV50_BIND_CP_GLOBAL 0 +#define NV50_BIND_CP_SCREEN 1 +#define NV50_BIND_CP_QUERY 2 +#define NV50_BIND_CP_COUNT 3 + +/* bufctx for other operations */ #define NV50_BIND_2D 0 #define NV50_BIND_M2MF 0 #define NV50_BIND_FENCE 1 @@ -101,8 +113,10 @@ struct nv50_context { struct nouveau_bufctx *bufctx_3d; struct nouveau_bufctx *bufctx; + struct nouveau_bufctx *bufctx_cp; uint32_t dirty; + uint32_t dirty_cp; /* dirty flags for compute state */ bool cb_dirty; struct nv50_graph_state state; @@ -115,6 +129,7 @@ struct nv50_context { struct nv50_program *vertprog; struct nv50_program *gmtyprog; struct nv50_program *fragprog; + struct nv50_program *compprog; struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[3]; @@ -163,6 +178,8 @@ struct nv50_context { uint32_t cond_condmode; /* the calculated condition */ struct nv50_blitctx *blit; + + struct util_dynarray global_residents; }; static inline struct nv50_context * @@ -302,4 +319,9 @@ struct pipe_video_buffer * nv98_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *template); +/* nv50_compute.c */ +void +nv50_launch_grid(struct pipe_context *, const uint *, const uint *, + uint32_t, const void *); + #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 89e7a338283..a4b8ddfda95 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -66,7 +66,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; - prog->vp.vertexid = 1; continue; default: break; @@ -259,6 +258,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) return nv50_vertprog_assign_slots(info); case PIPE_SHADER_FRAGMENT: return nv50_fragprog_assign_slots(info); + case PIPE_SHADER_COMPUTE: + return 0; default: return -1; } @@ -355,6 +356,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->gp.has_layer = 0; prog->gp.has_viewport = 0; + if (prog->type == PIPE_SHADER_COMPUTE) + info->prop.cp.inputOffset = 0x10; + info->driverPriv = prog; #ifdef DEBUG @@ -378,6 +382,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; + prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; @@ -401,6 +407,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, break; } prog->gp.vert_count = info->prop.gp.maxVertices; + } else + if (prog->type == PIPE_SHADER_COMPUTE) { + prog->cp.syms = info->bin.syms; + prog->cp.num_syms = info->bin.numSyms; } if (prog->pipe.stream_output.num_outputs) @@ -423,11 +433,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) struct nouveau_heap *heap; int ret; uint32_t size = align(prog->code_size, 0x40); + uint8_t prog_type; switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; + case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); return false; @@ -450,7 +462,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) return false; } } - prog->code_base = prog->mem->start; + + if (prog->type == PIPE_SHADER_COMPUTE) { + /* CP code must be uploaded in FP code segment. */ + prog_type = 1; + } else { + prog->code_base = prog->mem->start; + prog_type = prog->type; + } ret = nv50_tls_realloc(nv50->screen, prog->tls_space); if (ret < 0) { @@ -468,7 +487,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) false /* flatshade */); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, - (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, NOUVEAU_BO_VRAM, prog->code_size, prog->code); BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); @@ -489,7 +508,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) FREE(p->code); FREE(p->fixups); - + FREE(p->interps); FREE(p->so); memset(p, 0, sizeof(*p)); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 7a33eb11d6d..1de5122a56e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -76,9 +76,9 @@ struct nv50_program { ubyte psiz; /* output slot of point size */ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; - ubyte vertexid; ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; + bool need_vertex_id; } vp; struct { @@ -98,6 +98,13 @@ struct nv50_program { ubyte viewportid; /* hw value of viewport index output */ } gp; + struct { + uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */ + uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */ + void *syms; + unsigned num_syms; + } cp; + void *fixups; /* relocation records */ void *interps; /* interpolation records */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c index f31eaa0e314..cbef95d07f6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_push.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c @@ -24,6 +24,10 @@ struct push_context { struct translate *translate; bool primitive_restart; + + bool need_vertex_id; + int32_t index_bias; + uint32_t prim; uint32_t restart_index; uint32_t instance_id; @@ -74,6 +78,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -107,6 +116,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -140,6 +154,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) size = ctx->vertex_words * nr; + if (unlikely(ctx->need_vertex_id)) { + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, *elts + ctx->index_bias); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id, @@ -161,10 +180,18 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) static void emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) { + uint32_t elts = 0; + while (count) { unsigned push = MIN2(count, ctx->packet_vertex_limit); unsigned size = ctx->vertex_words * push; + if (unlikely(ctx->need_vertex_id)) { + /* For non-indexed draws, gl_VertexID goes up after each vertex. */ + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx->push, elts++); + } + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size); ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id, @@ -216,7 +243,14 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.push = nv50->base.pushbuf; ctx.translate = nv50->vertex->translate; - ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit; + + ctx.need_vertex_id = nv50->screen->base.class_3d >= NV84_3D_CLASS && + nv50->vertprog->vp.need_vertex_id && (nv50->vertex->num_elements < 32); + ctx.index_bias = info->index_bias; + + /* For indexed draws, gl_VertexID must be emitted for every vertex. */ + ctx.packet_vertex_limit = + ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit; ctx.vertex_words = nv50->vertex->vertex_size; assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); @@ -307,4 +341,10 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.instance_id++; ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } + + if (unlikely(ctx.need_vertex_id)) { + /* Reset gl_VertexID to prevent future indexed draws to be confused. */ + BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1); + PUSH_DATA (ctx.push, nv50->state.index_bias); + } } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index dd9b85b7208..4cd3b615606 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -27,6 +27,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" static struct pipe_query * nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) @@ -152,4 +154,79 @@ nv50_init_query_functions(struct nv50_context *nv50) pipe->end_query = nv50_end_query; pipe->get_query_result = nv50_get_query_result; pipe->render_condition = nv50_render_condition; + nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS; +} + +int +nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int num_hw_queries = 0; + + num_hw_queries = nv50_hw_get_driver_query_info(screen, 0, NULL); + + if (!info) + return num_hw_queries; + + /* Init default values. */ + info->name = "this_is_not_the_query_you_are_looking_for"; + info->query_type = 0xdeadd01d; + info->max_value.u64 = 0; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->group_id = -1; + info->flags = 0; + + return nv50_hw_get_driver_query_info(screen, id, info); +} + +int +nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_group_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += 2; + + if (!info) + return count; + + if (id == NV50_HW_SM_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = "MP counters"; + + /* Because we can't expose the number of hardware counters needed + * for each different query, we don't want to allow more than one + * active query simultaneously to avoid failure when the maximum + * number of counters is reached. Note that these groups of GPU + * counters are currently only used by AMD_performance_monitor. + */ + info->max_active_queries = 1; + info->num_queries = NV50_HW_SM_QUERY_COUNT; + return 1; + } + } + } else + if (id == NV50_HW_METRIC_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = "Performance metrics"; + info->max_active_queries = 1; + info->num_queries = NV50_HW_METRIC_QUERY_COUNT; + return 1; + } + } + } + + /* user asked for info about non-existing query group */ + info->name = "this_is_not_the_query_group_you_are_looking_for"; + info->max_active_queries = 0; + info->num_queries = 0; + return 0; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h index d990285c857..bd4c0a386f6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h @@ -28,6 +28,12 @@ nv50_query(struct pipe_query *pipe) return (struct nv50_query *)pipe; } +/* + * Driver queries groups: + */ +#define NV50_HW_SM_QUERY_GROUP 0 +#define NV50_HW_METRIC_QUERY_GROUP 1 + void nv50_init_query_functions(struct nv50_context *); #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index 945ce7abe50..b6ebbbf1010 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -25,6 +25,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" #include "nv_object.xml.h" #define NV50_HW_QUERY_STATE_READY 0 @@ -41,7 +43,7 @@ #define NV50_HW_QUERY_ALLOC_SPACE 256 -static bool +bool nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) { @@ -122,6 +124,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); + if (hq->funcs && hq->funcs->begin_query) + return hq->funcs->begin_query(nv50, hq); + /* For occlusion queries we have to change the storage, because a previous * query might set the initial render condition to false even *after* we re- * initialized it to true. @@ -193,6 +198,11 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); + if (hq->funcs && hq->funcs->end_query) { + hq->funcs->end_query(nv50, hq); + return; + } + hq->state = NV50_HW_QUERY_STATE_ENDED; switch (q->type) { @@ -261,6 +271,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, uint64_t *data64 = (uint64_t *)hq->data; int i; + if (hq->funcs && hq->funcs->get_query_result) + return hq->funcs->get_query_result(nv50, hq, wait, result); + if (hq->state != NV50_HW_QUERY_STATE_READY) nv50_hw_query_update(q); @@ -331,6 +344,18 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) struct nv50_hw_query *hq; struct nv50_query *q; + hq = nv50_hw_sm_create_query(nv50, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nv50_query *)hq; + } + + hq = nv50_hw_metric_create_query(nv50, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nv50_query *)hq; + } + hq = CALLOC_STRUCT(nv50_hw_query); if (!hq) return NULL; @@ -375,6 +400,26 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) return q; } +int +nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; + + num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL); + num_hw_metric_queries = + nv50_hw_metric_get_driver_query_info(screen, 0, NULL); + + if (!info) + return num_hw_sm_queries + num_hw_metric_queries; + + if (id < num_hw_sm_queries) + return nv50_hw_sm_get_driver_query_info(screen, id, info); + + return nv50_hw_metric_get_driver_query_info(screen, + id - num_hw_sm_queries, info); +} + void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, struct nv50_query *q, unsigned result_offset) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h index 294c67de9a4..82ec6bd2d96 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -8,8 +8,19 @@ #define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) +struct nv50_hw_query; + +struct nv50_hw_query_funcs { + void (*destroy_query)(struct nv50_context *, struct nv50_hw_query *); + boolean (*begin_query)(struct nv50_context *, struct nv50_hw_query *); + void (*end_query)(struct nv50_context *, struct nv50_hw_query *); + boolean (*get_query_result)(struct nv50_context *, struct nv50_hw_query *, + boolean, union pipe_query_result *); +}; + struct nv50_hw_query { struct nv50_query base; + const struct nv50_hw_query_funcs *funcs; uint32_t *data; uint32_t sequence; struct nouveau_bo *bo; @@ -31,6 +42,11 @@ nv50_hw_query(struct nv50_query *q) struct nv50_query * nv50_hw_create_query(struct nv50_context *, unsigned, unsigned); +int +nv50_hw_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +bool +nv50_hw_query_allocate(struct nv50_context *, struct nv50_query *, int); void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, struct nv50_query *, unsigned); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c new file mode 100644 index 00000000000..d1bccb94193 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c @@ -0,0 +1,207 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw_metric.h" +#include "nv50/nv50_query_hw_sm.h" + +/* === PERFORMANCE MONITORING METRICS for NV84+ === */ +static const char *nv50_hw_metric_names[] = +{ + "metric-branch_efficiency", +}; + +struct nv50_hw_metric_query_cfg { + uint32_t queries[4]; + uint32_t num_queries; +}; + +#define _SM(n) NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_ ##n) +#define _M(n, c) [NV50_HW_METRIC_QUERY_##n] = c + +/* ==== Compute capability 1.1 (G84+) ==== */ +static const struct nv50_hw_metric_query_cfg +sm11_branch_efficiency = +{ + .queries[0] = _SM(BRANCH), + .queries[1] = _SM(DIVERGENT_BRANCH), + .num_queries = 2, +}; + +static const struct nv50_hw_metric_query_cfg *sm11_hw_metric_queries[] = +{ + _M(BRANCH_EFFICIENCY, &sm11_branch_efficiency), +}; + +#undef _SM +#undef _M + +static const struct nv50_hw_metric_query_cfg * +nv50_hw_metric_query_get_cfg(struct nv50_context *nv50, + struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + return sm11_hw_metric_queries[q->type - NV50_HW_METRIC_QUERY(0)]; +} + +static void +nv50_hw_metric_destroy_query(struct nv50_context *nv50, + struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) + hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]); + FREE(hmq); +} + +static boolean +nv50_hw_metric_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + boolean ret = false; + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) { + ret = hmq->queries[i]->funcs->begin_query(nv50, hmq->queries[i]); + if (!ret) + return ret; + } + return ret; +} + +static void +nv50_hw_metric_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) + hmq->queries[i]->funcs->end_query(nv50, hmq->queries[i]); +} + +static uint64_t +sm11_hw_metric_calc_result(struct nv50_hw_query *hq, uint64_t res64[8]) +{ + switch (hq->base.type - NV50_HW_METRIC_QUERY(0)) { + case NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY: + /* (branch / (branch + divergent_branch)) * 100 */ + if (res64[0] + res64[1]) + return (res64[0] / (double)(res64[0] + res64[1])) * 100; + break; + default: + debug_printf("invalid metric type: %d\n", + hq->base.type - NV50_HW_METRIC_QUERY(0)); + break; + } + return 0; +} + +static boolean +nv50_hw_metric_get_query_result(struct nv50_context *nv50, + struct nv50_hw_query *hq, boolean wait, + union pipe_query_result *result) +{ + struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq); + union pipe_query_result results[4] = {}; + uint64_t res64[4] = {}; + boolean ret = false; + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) { + ret = hmq->queries[i]->funcs->get_query_result(nv50, hmq->queries[i], + wait, &results[i]); + if (!ret) + return ret; + res64[i] = *(uint64_t *)&results[i]; + } + + *(uint64_t *)result = sm11_hw_metric_calc_result(hq, res64); + return ret; +} + +static const struct nv50_hw_query_funcs hw_metric_query_funcs = { + .destroy_query = nv50_hw_metric_destroy_query, + .begin_query = nv50_hw_metric_begin_query, + .end_query = nv50_hw_metric_end_query, + .get_query_result = nv50_hw_metric_get_query_result, +}; + +struct nv50_hw_query * +nv50_hw_metric_create_query(struct nv50_context *nv50, unsigned type) +{ + const struct nv50_hw_metric_query_cfg *cfg; + struct nv50_hw_metric_query *hmq; + struct nv50_hw_query *hq; + unsigned i; + + if (type < NV50_HW_METRIC_QUERY(0) || type > NV50_HW_METRIC_QUERY_LAST) + return NULL; + + hmq = CALLOC_STRUCT(nv50_hw_metric_query); + if (!hmq) + return NULL; + + hq = &hmq->base; + hq->funcs = &hw_metric_query_funcs; + hq->base.type = type; + + cfg = nv50_hw_metric_query_get_cfg(nv50, hq); + + for (i = 0; i < cfg->num_queries; i++) { + hmq->queries[i] = nv50_hw_sm_create_query(nv50, cfg->queries[i]); + if (!hmq->queries[i]) { + nv50_hw_metric_destroy_query(nv50, hq); + return NULL; + } + hmq->num_queries++; + } + + return hq; +} + +int +nv50_hw_metric_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += NV50_HW_METRIC_QUERY_COUNT; + + if (!info) + return count; + + if (id < count) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = nv50_hw_metric_names[id]; + info->query_type = NV50_HW_METRIC_QUERY(id); + info->group_id = NV50_HW_METRIC_QUERY_GROUP; + return 1; + } + } + } + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h new file mode 100644 index 00000000000..f8cfc04084f --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h @@ -0,0 +1,34 @@ +#ifndef __NV50_QUERY_HW_METRIC_H__ +#define __NV50_QUERY_HW_METRIC_H__ + +#include "nv50_query_hw.h" + +struct nv50_hw_metric_query { + struct nv50_hw_query base; + struct nv50_hw_query *queries[4]; + unsigned num_queries; +}; + +static inline struct nv50_hw_metric_query * +nv50_hw_metric_query(struct nv50_hw_query *hq) +{ + return (struct nv50_hw_metric_query *)hq; +} + +/* + * Driver metrics queries: + */ +#define NV50_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i)) +#define NV50_HW_METRIC_QUERY_LAST NV50_HW_METRIC_QUERY(NV50_HW_METRIC_QUERY_COUNT - 1) +enum nv50_hw_metric_queries +{ + NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY = 0, + NV50_HW_METRIC_QUERY_COUNT +}; + +struct nv50_hw_query * +nv50_hw_metric_create_query(struct nv50_context *, unsigned); +int +nv50_hw_metric_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c new file mode 100644 index 00000000000..8453ce76095 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -0,0 +1,417 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw_sm.h" + +#include "nv_object.xml.h" +#include "nv50/nv50_compute.xml.h" + +/* === PERFORMANCE MONITORING COUNTERS for NV84+ === */ + +/* NOTE: intentionally using the same names as NV */ +static const char *nv50_hw_sm_query_names[] = +{ + "branch", + "divergent_branch", + "instructions", + "prof_trigger_00", + "prof_trigger_01", + "prof_trigger_02", + "prof_trigger_03", + "prof_trigger_04", + "prof_trigger_05", + "prof_trigger_06", + "prof_trigger_07", + "sm_cta_launched", + "warp_serialize", +}; + +static const uint64_t nv50_read_hw_sm_counters_code[] = +{ + /* and b32 $r0 $r0 0x0000ffff + * add b32 $c0 $r0 $r0 $r0 + * (lg $c0) ret + * mov $r0 $pm0 + * mov $r1 $pm1 + * mov $r2 $pm2 + * mov $r3 $pm3 + * mov $r4 $physid + * ld $r5 b32 s[0x10] + * ld $r6 b32 s[0x14] + * and b32 $r4 $r4 0x000f0000 + * shr u32 $r4 $r4 0x10 + * mul $r4 u24 $r4 0x14 + * add b32 $r5 $r5 $r4 + * st b32 g15[$r5] $r0 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r1 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r2 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r3 + * add b32 $r5 $r5 0x04 + * exit st b32 g15[$r5] $r6 */ + 0x00000fffd03f0001ULL, + 0x040007c020000001ULL, + 0x0000028030000003ULL, + 0x6001078000000001ULL, + 0x6001478000000005ULL, + 0x6001878000000009ULL, + 0x6001c7800000000dULL, + 0x6000078000000011ULL, + 0x4400c78010000815ULL, + 0x4400c78010000a19ULL, + 0x0000f003d0000811ULL, + 0xe410078030100811ULL, + 0x0000000340540811ULL, + 0x0401078020000a15ULL, + 0xa0c00780d00f0a01ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a05ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a09ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a0dULL, + 0x0000000320048a15ULL, + 0xa0c00781d00f0a19ULL, +}; + +struct nv50_hw_sm_counter_cfg +{ + uint32_t mode : 4; /* LOGOP, LOGOP_PULSE */ + uint32_t unit : 8; /* UNK[0-5] */ + uint32_t sig : 8; /* signal selection */ +}; + +struct nv50_hw_sm_query_cfg +{ + struct nv50_hw_sm_counter_cfg ctr[4]; + uint8_t num_counters; +}; + +#define _Q(n, m, u, s) [NV50_HW_SM_QUERY_##n] = { { { NV50_COMPUTE_MP_PM_CONTROL_MODE_##m, NV50_COMPUTE_MP_PM_CONTROL_UNIT_##u, s, }, {}, {}, {} }, 1 } + +/* ==== Compute capability 1.1 (G84+) ==== */ +static const struct nv50_hw_sm_query_cfg sm11_hw_sm_queries[] = +{ + _Q(BRANCH, LOGOP, UNK4, 0x02), + _Q(DIVERGENT_BRANCH, LOGOP, UNK4, 0x09), + _Q(INSTRUCTIONS, LOGOP, UNK4, 0x04), + _Q(PROF_TRIGGER_0, LOGOP, UNK1, 0x26), + _Q(PROF_TRIGGER_1, LOGOP, UNK1, 0x27), + _Q(PROF_TRIGGER_2, LOGOP, UNK1, 0x28), + _Q(PROF_TRIGGER_3, LOGOP, UNK1, 0x29), + _Q(PROF_TRIGGER_4, LOGOP, UNK1, 0x2a), + _Q(PROF_TRIGGER_5, LOGOP, UNK1, 0x2b), + _Q(PROF_TRIGGER_6, LOGOP, UNK1, 0x2c), + _Q(PROF_TRIGGER_7, LOGOP, UNK1, 0x2d), + _Q(SM_CTA_LAUNCHED, LOGOP, UNK1, 0x33), + _Q(WARP_SERIALIZE, LOGOP, UNK0, 0x0b), +}; + +static inline uint16_t nv50_hw_sm_get_func(uint8_t slot) +{ + switch (slot) { + case 0: return 0xaaaa; + case 1: return 0xcccc; + case 2: return 0xf0f0; + case 3: return 0xff00; + } + return 0; +} + +static const struct nv50_hw_sm_query_cfg * +nv50_hw_sm_query_get_cfg(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + return &sm11_hw_sm_queries[q->type - NV50_HW_SM_QUERY(0)]; +} + +static void +nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + q->funcs->destroy_query(nv50, q); +} + +static boolean +nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + const struct nv50_hw_sm_query_cfg *cfg; + uint16_t func; + int i, c; + + cfg = nv50_hw_sm_query_get_cfg(nv50, hq); + + /* check if we have enough free counter slots */ + if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) { + NOUVEAU_ERR("Not enough free MP counter slots !\n"); + return false; + } + + assert(cfg->num_counters <= 4); + PUSH_SPACE(push, 4 * 4); + + /* set sequence field to 0 (used to check if result is available) */ + for (i = 0; i < screen->MPsInTP; ++i) { + const unsigned b = (0x14 / 4) * i; + hq->data[b + 16] = 0; + } + hq->sequence++; + + for (i = 0; i < cfg->num_counters; i++) { + screen->pm.num_hw_sm_active++; + + /* find free counter slots */ + for (c = 0; c < 4; ++c) { + if (!screen->pm.mp_counter[c]) { + hsq->ctr[i] = c; + screen->pm.mp_counter[c] = hsq; + break; + } + } + + /* select func to aggregate counters */ + func = nv50_hw_sm_get_func(c); + + /* configure and reset the counter(s) */ + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) + | cfg->ctr[i].unit | cfg->ctr[i].mode); + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1); + PUSH_DATA (push, 0); + } + return true; +} + +static void +nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_screen *screen = nv50->screen; + struct pipe_context *pipe = &nv50->base.pipe; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + uint32_t mask; + uint32_t input[3]; + const uint block[3] = { 32, 1, 1 }; + const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 }; + int c; + + if (unlikely(!screen->pm.prog)) { + struct nv50_program *prog = CALLOC_STRUCT(nv50_program); + prog->type = PIPE_SHADER_COMPUTE; + prog->translated = true; + prog->max_gpr = 7; + prog->parm_size = 8; + prog->code = (uint32_t *)nv50_read_hw_sm_counters_code; + prog->code_size = sizeof(nv50_read_hw_sm_counters_code); + screen->pm.prog = prog; + } + + /* disable all counting */ + PUSH_SPACE(push, 8); + for (c = 0; c < 4; c++) { + if (screen->pm.mp_counter[c]) { + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + PUSH_DATA (push, 0); + } + } + + /* release counters for this query */ + for (c = 0; c < 4; c++) { + if (screen->pm.mp_counter[c] == hsq) { + screen->pm.num_hw_sm_active--; + screen->pm.mp_counter[c] = NULL; + } + } + + BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR, + hq->bo); + + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + + pipe->bind_compute_state(pipe, screen->pm.prog); + input[0] = hq->bo->offset + hq->base_offset; + input[1] = hq->sequence; + pipe->launch_grid(pipe, block, grid, 0, input); + + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY); + + /* re-active other counters */ + PUSH_SPACE(push, 8); + mask = 0; + for (c = 0; c < 4; c++) { + const struct nv50_hw_sm_query_cfg *cfg; + unsigned i; + + hsq = screen->pm.mp_counter[c]; + if (!hsq) + continue; + + cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base); + for (i = 0; i < cfg->num_counters; i++) { + uint16_t func; + + if (mask & (1 << hsq->ctr[i])) + break; + + mask |= 1 << hsq->ctr[i]; + func = nv50_hw_sm_get_func(hsq->ctr[i]); + + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1); + PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) + | cfg->ctr[i].unit | cfg->ctr[i].mode); + } + } +} + +static inline bool +nv50_hw_sm_query_read_data(uint32_t count[32][4], + struct nv50_context *nv50, bool wait, + struct nv50_hw_query *hq, + const struct nv50_hw_sm_query_cfg *cfg, + unsigned mp_count) +{ + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + unsigned p, c; + + for (p = 0; p < mp_count; ++p) { + const unsigned b = (0x14 / 4) * p; + + for (c = 0; c < cfg->num_counters; ++c) { + if (hq->data[b + 4] != hq->sequence) { + if (!wait) + return false; + if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client)) + return false; + } + count[p][c] = hq->data[b + hsq->ctr[c]]; + } + } + return true; +} + +static boolean +nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq, + boolean wait, union pipe_query_result *result) +{ + uint32_t count[32][4]; + uint64_t value = 0; + unsigned mp_count = MIN2(nv50->screen->MPsInTP, 32); + unsigned p, c; + const struct nv50_hw_sm_query_cfg *cfg; + bool ret; + + cfg = nv50_hw_sm_query_get_cfg(nv50, hq); + + ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count); + if (!ret) + return false; + + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + value += count[p][c]; + + /* We only count a single TP, and simply multiply by the total number of + * TPs to compute result over all TPs. This is inaccurate, but enough! */ + value *= nv50->screen->TPs; + + *(uint64_t *)result = value; + return true; +} + +static const struct nv50_hw_query_funcs hw_sm_query_funcs = { + .destroy_query = nv50_hw_sm_destroy_query, + .begin_query = nv50_hw_sm_begin_query, + .end_query = nv50_hw_sm_end_query, + .get_query_result = nv50_hw_sm_get_query_result, +}; + +struct nv50_hw_query * +nv50_hw_sm_create_query(struct nv50_context *nv50, unsigned type) +{ + struct nv50_hw_sm_query *hsq; + struct nv50_hw_query *hq; + unsigned space; + + if (type < NV50_HW_SM_QUERY(0) || type > NV50_HW_SM_QUERY_LAST) + return NULL; + + hsq = CALLOC_STRUCT(nv50_hw_sm_query); + if (!hsq) + return NULL; + + hq = &hsq->base; + hq->funcs = &hw_sm_query_funcs; + hq->base.type = type; + + /* + * for each MP: + * [00] = MP.C0 + * [04] = MP.C1 + * [08] = MP.C2 + * [0c] = MP.C3 + * [10] = MP.sequence + */ + space = (4 + 1) * nv50->screen->MPsInTP * sizeof(uint32_t); + + if (!nv50_hw_query_allocate(nv50, &hq->base, space)) { + FREE(hq); + return NULL; + } + + return hq; +} + +int +nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += NV50_HW_SM_QUERY_COUNT; + + if (!info) + return count; + + if (id < count) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = nv50_hw_sm_query_names[id]; + info->query_type = NV50_HW_SM_QUERY(id); + info->group_id = NV50_HW_SM_QUERY_GROUP; + return 1; + } + } + } + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h new file mode 100644 index 00000000000..c1a1cd175e3 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h @@ -0,0 +1,45 @@ +#ifndef __NV50_QUERY_HW_SM_H__ +#define __NV50_QUERY_HW_SM_H__ + +#include "nv50_query_hw.h" + +struct nv50_hw_sm_query { + struct nv50_hw_query base; + uint8_t ctr[4]; +}; + +static inline struct nv50_hw_sm_query * +nv50_hw_sm_query(struct nv50_hw_query *hq) +{ + return (struct nv50_hw_sm_query *)hq; +} + +/* + * Performance counter queries: + */ +#define NV50_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) +#define NV50_HW_SM_QUERY_LAST NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_COUNT - 1) +enum nv50_hw_sm_queries +{ + NV50_HW_SM_QUERY_BRANCH = 0, + NV50_HW_SM_QUERY_DIVERGENT_BRANCH, + NV50_HW_SM_QUERY_INSTRUCTIONS, + NV50_HW_SM_QUERY_PROF_TRIGGER_0, + NV50_HW_SM_QUERY_PROF_TRIGGER_1, + NV50_HW_SM_QUERY_PROF_TRIGGER_2, + NV50_HW_SM_QUERY_PROF_TRIGGER_3, + NV50_HW_SM_QUERY_PROF_TRIGGER_4, + NV50_HW_SM_QUERY_PROF_TRIGGER_5, + NV50_HW_SM_QUERY_PROF_TRIGGER_6, + NV50_HW_SM_QUERY_PROF_TRIGGER_7, + NV50_HW_SM_QUERY_SM_CTA_LAUNCHED, + NV50_HW_SM_QUERY_WARP_SERIALIZE, + NV50_HW_SM_QUERY_COUNT, +}; + +struct nv50_hw_query * +nv50_hw_sm_create_query(struct nv50_context *, unsigned); +int +nv50_hw_sm_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index f47e998ab1e..1e4b75f18e0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -41,8 +41,6 @@ #define THREADS_IN_WARP 32 -#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) - static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, @@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_COMPUTE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ @@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: - case PIPE_CAP_COMPUTE: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */ @@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_COMPUTE: break; default: return 0; @@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) return 0.0f; } +static int +nv50_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_compute_cap param, void *data) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x)); \ + return sizeof(x); \ +} while (0) + + switch (param) { + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + RET((uint64_t []) { 2 }); + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + RET(((uint64_t []) { 65535, 65535 })); + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + RET(((uint64_t []) { 512, 512, 64 })); + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + RET((uint64_t []) { 512 }); + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */ + RET((uint64_t []) { 1ULL << 32 }); + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ + RET((uint64_t []) { 16 << 10 }); + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ + RET((uint64_t []) { 16 << 10 }); + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ + RET((uint64_t []) { 4096 }); + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + RET((uint32_t []) { 32 }); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + RET((uint64_t []) { 1ULL << 40 }); + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 0 }); + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { screen->mp_count }); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ + default: + return 0; + } + +#undef RET +} + static void nv50_screen_destroy(struct pipe_screen *pscreen) { @@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->tesla); nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->compute); nouveau_object_del(&screen->sync); nouveau_screen_fini(&screen->base); @@ -640,7 +686,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, 0); if (screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, 0); } @@ -742,6 +788,9 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; + pscreen->get_compute_param = nv50_screen_get_compute_param; + pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; + pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info; nv50_screen_init_resource_functions(pscreen); @@ -851,6 +900,8 @@ nv50_screen_create(struct nouveau_device *dev) screen->TPs = util_bitcount(value & 0xffff); screen->MPsInTP = util_bitcount((value >> 24) & 0xf); + screen->mp_count = screen->TPs * screen->MPsInTP; + stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP * STACK_WARPS_ALLOC * 64 * 8; @@ -902,6 +953,12 @@ nv50_screen_create(struct nouveau_device *dev) nv50_screen_init_hwctx(screen); + ret = nv50_screen_compute_setup(screen, screen->base.pushbuf); + if (ret) { + NOUVEAU_ERR("Failed to init compute context: %d\n", ret); + goto fail; + } + nouveau_fence_new(&screen->base, &screen->base.fence.current, false); return pscreen; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index ce51f0fc254..2a4983d1020 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -23,6 +23,10 @@ struct nv50_context; #define NV50_MAX_VIEWPORTS 16 +#define NV50_MAX_GLOBALS 16 + +#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) + struct nv50_blitter; struct nv50_graph_state { @@ -66,6 +70,7 @@ struct nv50_screen { unsigned MPsInTP; unsigned max_tls_space; unsigned cur_tls_space; + unsigned mp_count; struct nouveau_heap *vp_code_heap; struct nouveau_heap *gp_code_heap; @@ -90,9 +95,16 @@ struct nv50_screen { struct nouveau_bo *bo; } fence; + struct { + struct nv50_program *prog; /* compute state object to read MP counters */ + struct nv50_hw_sm_query *mp_counter[4]; /* counter to query allocation */ + uint8_t num_hw_sm_active; + } pm; + struct nouveau_object *sync; struct nouveau_object *tesla; + struct nouveau_object *compute; struct nouveau_object *eng2d; struct nouveau_object *m2mf; }; @@ -103,12 +115,19 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_info *); +int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_group_info *); + bool nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); int nv50_screen_tic_alloc(struct nv50_screen *, void *); int nv50_screen_tsc_alloc(struct nv50_screen *, void *); +int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *); + static inline void nv50_resource_fence(struct nv04_resource *res, uint32_t flags) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index d27f12ca94b..b4ea08d4d13 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) nv50->dirty |= NV50_NEW_GMTYPROG; } +static void * +nv50_cp_state_create(struct pipe_context *pipe, + const struct pipe_compute_state *cso) +{ + struct nv50_program *prog; + + prog = CALLOC_STRUCT(nv50_program); + if (!prog) + return NULL; + prog->type = PIPE_SHADER_COMPUTE; + + prog->cp.smem_size = cso->req_local_mem; + prog->cp.lmem_size = cso->req_private_mem; + prog->parm_size = cso->req_input_mem; + + prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + + return (void *)prog; +} + +static void +nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->compprog = hwcso; + nv50->dirty_cp |= NV50_NEW_CP_PROGRAM; +} + static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_constant_buffer *cb) @@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe, nv50->dirty |= NV50_NEW_STRMOUT; } +static void +nv50_set_compute_resources(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_surface **resources) +{ + /* TODO: bind surfaces */ +} + +static inline void +nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res) +{ + struct nv04_resource *buf = nv04_resource(res); + if (buf) { + uint64_t limit = (buf->address + buf->base.width0) - 1; + if (limit < (1ULL << 32)) { + *phandle = (uint32_t)buf->address; + } else { + NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: " + "resource not contained within 32-bit address space !\n"); + *phandle = 0; + } + } else { + *phandle = 0; + } +} + +static void +nv50_set_global_bindings(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_resource **resources, + uint32_t **handles) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_resource **ptr; + unsigned i; + const unsigned end = start + nr; + + if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) { + const unsigned old_size = nv50->global_residents.size; + const unsigned req_size = end * sizeof(struct pipe_resource *); + util_dynarray_resize(&nv50->global_residents, req_size); + memset((uint8_t *)nv50->global_residents.data + old_size, 0, + req_size - old_size); + } + + if (resources) { + ptr = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) { + pipe_resource_reference(&ptr[i], resources[i]); + nv50_set_global_handle(handles[i], resources[i]); + } + } else { + ptr = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) + pipe_resource_reference(&ptr[i], NULL); + } + + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL); + + nv50->dirty_cp = NV50_NEW_CP_GLOBALS; +} + void nv50_init_state_functions(struct nv50_context *nv50) { @@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->create_vs_state = nv50_vp_state_create; pipe->create_fs_state = nv50_fp_state_create; pipe->create_gs_state = nv50_gp_state_create; + pipe->create_compute_state = nv50_cp_state_create; pipe->bind_vs_state = nv50_vp_state_bind; pipe->bind_fs_state = nv50_fp_state_bind; pipe->bind_gs_state = nv50_gp_state_bind; + pipe->bind_compute_state = nv50_cp_state_bind; pipe->delete_vs_state = nv50_sp_state_delete; pipe->delete_fs_state = nv50_sp_state_delete; pipe->delete_gs_state = nv50_sp_state_delete; + pipe->delete_compute_state = nv50_sp_state_delete; pipe->set_blend_color = nv50_set_blend_color; pipe->set_stencil_ref = nv50_set_stencil_ref; @@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->stream_output_target_destroy = nv50_so_target_destroy; pipe->set_stream_output_targets = nv50_set_stream_output_targets; + pipe->set_global_binding = nv50_set_global_bindings; + pipe->set_compute_resources = nv50_set_compute_resources; + nv50->sample_mask = ~0; nv50->min_samples = 1; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index b6181edf24f..02a759c23ad 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -503,8 +503,7 @@ static struct state_validate { { nv50_validate_samplers, NV50_NEW_SAMPLERS }, { nv50_stream_output_validate, NV50_NEW_STRMOUT | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS | - NV50_NEW_VERTPROG }, + { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 916a7d44a31..8ba19d2cc90 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -339,12 +339,18 @@ nv50_clear_render_target(struct pipe_context *pipe, PUSH_DATA (push, (width << 16) | dstx); PUSH_DATA (push, (height << 16) | dsty); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, 0x3c | (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } @@ -415,12 +421,18 @@ nv50_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, (width << 16) | dstx); PUSH_DATA (push, (height << 16) | dsty); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, mode | (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } @@ -673,6 +685,9 @@ nv50_clear_buffer(struct pipe_context *pipe, PUSH_DATA (push, (width << 16)); PUSH_DATA (push, (height << 16)); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); + BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); PUSH_DATA (push, 0x3c); @@ -690,6 +705,9 @@ nv50_clear_buffer(struct pipe_context *pipe, PUSH_DATA (push, 0x3c); } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 9aa593f919e..85878d5fcc7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -294,8 +294,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) uint64_t addrs[PIPE_MAX_ATTRIBS]; uint32_t limits[PIPE_MAX_ATTRIBS]; struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv50_vertex_stateobj dummy = {}; - struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy; + struct nv50_vertex_stateobj *vertex = nv50->vertex; struct pipe_vertex_buffer *vb; struct nv50_vertex_element *ve; uint32_t mask; @@ -303,14 +302,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) unsigned i; const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts); - /* A vertexid is not generated for inline data uploads. Have to use a - * VBO. This check must come after the vertprog has been validated, - * otherwise vertexid may be unset. - */ - assert(nv50->vertprog->translated); - if (nv50->vertprog->vp.vertexid) - nv50->vbo_push_hint = 0; - if (unlikely(vertex->need_conversion)) nv50->vbo_fifo = ~0; else @@ -487,7 +478,7 @@ nv50_draw_arrays(struct nv50_context *nv50, BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, 0); if (nv50->screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, 0); } nv50->state.index_bias = 0; @@ -613,7 +604,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten, BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); PUSH_DATA (push, index_bias); if (nv50->screen->base.class_3d >= NV84_3D_CLASS) { - BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1); + BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); PUSH_DATA (push, index_bias); } nv50->state.index_bias = index_bias; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h index 76f1b41ea70..68002305d72 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_3D(m) 3, (m) #define NV50_3D(n) SUBC_3D(NV50_3D_##n) +#define NV84_3D(n) SUBC_3D(NV84_3D_##n) #define NVA0_3D(n) SUBC_3D(NVA0_3D_##n) #define SUBC_2D(m) 4, (m) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 82ed5a1864e..162661ff2a7 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, int ref) { struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe); + unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER; unsigned s, i; - if (res->bind & PIPE_BIND_RENDER_TARGET) { + if (bind & PIPE_BIND_RENDER_TARGET) { for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { if (nvc0->framebuffer.cbufs[i] && nvc0->framebuffer.cbufs[i]->texture == res) { @@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } } - if (res->bind & PIPE_BIND_DEPTH_STENCIL) { + if (bind & PIPE_BIND_DEPTH_STENCIL) { if (nvc0->framebuffer.zsbuf && nvc0->framebuffer.zsbuf->texture == res) { nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; @@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (res->bind & (PIPE_BIND_VERTEX_BUFFER | - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_COMMAND_ARGS_BUFFER | - PIPE_BIND_SAMPLER_VIEW)) { + if (bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER | + PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_COMMAND_ARGS_BUFFER | + PIPE_BIND_SAMPLER_VIEW)) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (nvc0->vtxbuf[i].buffer == res) { nvc0->dirty |= NVC0_NEW_ARRAYS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index f53921092a5..d992b10a23c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, info->max_value.u64 = 0; info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; info->group_id = -1; + info->flags = 0; #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS if (id < num_sw_queries) @@ -200,7 +201,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, if (id == NVC0_HW_SM_QUERY_GROUP) { if (screen->compute) { info->name = "MP counters"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; /* Because we can't expose the number of hardware counters needed for * each different query, we don't want to allow more than one active @@ -224,7 +224,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, if (screen->compute) { if (screen->base.class_3d < NVE4_3D_CLASS) { info->name = "Performance metrics"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; info->max_active_queries = 1; info->num_queries = NVC0_HW_METRIC_QUERY_COUNT; return 1; @@ -234,7 +233,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) { info->name = "Driver statistics"; - info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU; info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT; info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT; return 1; @@ -245,7 +243,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, info->name = "this_is_not_the_query_group_you_are_looking_for"; info->max_active_queries = 0; info->num_queries = 0; - info->type = 0; return 0; } @@ -260,4 +257,5 @@ nvc0_init_query_functions(struct nvc0_context *nvc0) pipe->end_query = nvc0_end_query; pipe->get_query_result = nvc0_get_query_result; pipe->render_condition = nvc0_render_condition; + nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 44b222e5134..7962143d45a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -1014,14 +1014,15 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); prog->type = PIPE_SHADER_COMPUTE; prog->translated = true; - prog->num_gprs = 14; prog->parm_size = 12; if (is_nve4) { prog->code = (uint32_t *)nve4_read_hw_sm_counters_code; prog->code_size = sizeof(nve4_read_hw_sm_counters_code); + prog->num_gprs = 14; } else { prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code; prog->code_size = sizeof(nvc0_read_hw_sm_counters_code); + prog->num_gprs = 12; } screen->pm.prog = prog; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index cdb1fc1145f..6a4ae5be2ab 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -341,12 +341,16 @@ nvc0_clear_render_target(struct pipe_context *pipe, nvc0_resource_fence(res, NOUVEAU_BO_WR); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, 0x3c | (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } @@ -470,6 +474,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0); IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0); + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); if (width * height != elements) { @@ -486,6 +492,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; @@ -545,12 +553,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, dst->u.tex.first_layer); IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); + IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { PUSH_DATA (push, mode | (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); } + IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } |