diff options
-rw-r--r-- | src/gallium/drivers/nouveau/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_compute.c | 320 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h | 444 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_context.c | 30 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_context.h | 23 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_program.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_program.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_screen.c | 61 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_screen.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv50/nv50_state.c | 99 |
10 files changed, 1006 insertions, 9 deletions
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 83f81135590..c2ff8e9b46e 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -64,6 +64,7 @@ NV50_C_SOURCES := \ nv50/nv50_3ddefs.xml.h \ nv50/nv50_3d.xml.h \ nv50/nv50_blit.h \ + nv50/nv50_compute.c \ nv50/nv50_context.c \ nv50/nv50_context.h \ nv50/nv50_defs.xml.h \ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c new file mode 100644 index 00000000000..6d23fd66945 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -0,0 +1,320 @@ +/* + * Copyright 2012 Francisco Jerez + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "nv50/nv50_context.h" +#include "nv50/nv50_compute.xml.h" + +#include "codegen/nv50_ir_driver.h" + +int +nv50_screen_compute_setup(struct nv50_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_device *dev = screen->base.device; + struct nouveau_object *chan = screen->base.channel; + struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; + unsigned obj_class; + int i, ret; + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + obj_class = NV50_COMPUTE_CLASS; + break; + case 0xa0: + switch (dev->chipset) { + case 0xa3: + case 0xa5: + case 0xa8: + obj_class = NVA3_COMPUTE_CLASS; + break; + default: + obj_class = NV50_COMPUTE_CLASS; + break; + } + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, + &screen->compute); + if (ret) + return ret; + + BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->handle); + + BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->stack_bo->offset); + PUSH_DATA (push, screen->stack_bo->offset); + BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1); + PUSH_DATA (push, 4); + + BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1); + PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); + BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1); + PUSH_DATA (push, 0x100); + BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1); + PUSH_DATA (push, fifo->vram); + + for (i = 0; i < 15; i++) { + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + } + + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1); + PUSH_DATA (push, ~0); + BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1); + PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); + + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1); + PUSH_DATA (push, 7); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1); + PUSH_DATA (push, 7); + BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1); + PUSH_DATA (push, 0x54); + BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1); + PUSH_DATA (push, fifo->vram); + + BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1); + PUSH_DATA (push, fifo->vram); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls_bo->offset + 65536); + PUSH_DATA (push, screen->tls_bo->offset + 65536); + BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1); + PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); + + return 0; +} + +static bool +nv50_compute_validate_program(struct nv50_context *nv50) +{ + struct nv50_program *prog = nv50->compprog; + + if (prog->mem) + return true; + + if (!prog->translated) { + prog->translated = nv50_program_translate( + prog, nv50->screen->base.device->chipset, &nv50->base.debug); + if (!prog->translated) + return false; + } + if (unlikely(!prog->code_size)) + return false; + + if (likely(prog->code_size)) { + if (nv50_program_upload_code(nv50, prog)) { + struct nouveau_pushbuf *push = nv50->base.pushbuf; + BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1); + PUSH_DATA (push, 0); + return true; + } + } + return false; +} + +static void +nv50_compute_validate_globals(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource *res = *util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, i); + if (res) + nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL, + nv04_resource(res), NOUVEAU_BO_RDWR); + } +} + +static bool +nv50_compute_state_validate(struct nv50_context *nv50) +{ + if (!nv50_compute_validate_program(nv50)) + return false; + + if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS) + nv50_compute_validate_globals(nv50); + + /* TODO: validate textures, samplers, surfaces */ + + nv50_bufctx_fence(nv50->bufctx_cp, false); + + nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf))) + return false; + if (unlikely(nv50->state.flushed)) + nv50_bufctx_fence(nv50->bufctx_cp, true); + + return true; +} + +static void +nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_pushbuf *push = screen->base.pushbuf; + unsigned size = align(nv50->compprog->parm_size, 0x4); + + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + PUSH_DATA (push, (size / 4) << 8); + + if (size) { + struct nouveau_mm_allocation *mm; + struct nouveau_bo *bo = NULL; + unsigned offset; + + mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset); + assert(mm); + + nouveau_bo_map(bo, 0, screen->base.client); + memcpy(bo->map + offset, input, size); + + nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4); + nouveau_pushbuf_data(push, bo, offset, size); + + nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); + nouveau_bo_ref(NULL, &bo); + nouveau_bufctx_reset(nv50->bufctx, 0); + } +} + +static uint32_t +nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label) +{ + struct nv50_program *prog = nv50->compprog; + const struct nv50_ir_prog_symbol *syms = + (const struct nv50_ir_prog_symbol *)prog->cp.syms; + unsigned i; + + for (i = 0; i < prog->cp.num_syms; ++i) { + if (syms[i].label == label) + return prog->code_base + syms[i].offset; + } + return prog->code_base; /* no symbols or symbol not found */ +} + +void +nv50_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, const void *input) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2]; + struct nv50_program *cp = nv50->compprog; + bool ret; + + ret = !nv50_compute_state_validate(nv50); + if (ret) { + NOUVEAU_ERR("Failed to launch grid !\n"); + return; + } + + nv50_compute_upload_input(nv50, input); + + BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); + PUSH_DATA (push, nv50_compute_find_symbol(nv50, label)); + + BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); + PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); + BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1); + PUSH_DATA (push, cp->max_gpr); + + /* grid/block setup */ + BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); + PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]); + PUSH_DATA (push, block_layout[2]); + BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); + PUSH_DATA (push, 1 << 16 | block_size); + BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); + PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]); + BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); + PUSH_DATA (push, 1); + + /* kernel launching */ + BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + + /* bind a compute shader clobbers fragment shader state */ + nv50->dirty |= NV50_NEW_FRAGPROG; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h new file mode 100644 index 00000000000..268d11253b6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h @@ -0,0 +1,444 @@ +#ifndef NV50_COMPUTE_XML +#define NV50_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- rnndb/graph/g80_compute.xml ( 14027 bytes, from 2015-02-14 02:01:36) +- rnndb/copyright.xml ( 6456 bytes, from 2015-02-14 02:01:36) +- rnndb/nvchipsets.xml ( 2833 bytes, from 2015-04-28 16:28:33) +- rnndb/fifo/nv_object.xml ( 15390 bytes, from 2015-04-22 20:36:09) +- rnndb/g80_defs.xml ( 18210 bytes, from 2015-10-19 20:49:59) + +Copyright (C) 2006-2015 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin KoĆcielnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NV50_COMPUTE_DMA_NOTIFY 0x00000180 + +#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0 + +#define NV50_COMPUTE_DMA_QUERY 0x000001a4 + +#define NV50_COMPUTE_DMA_LOCAL 0x000001b8 + +#define NV50_COMPUTE_DMA_STACK 0x000001bc + +#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0 + +#define NV50_COMPUTE_DMA_TSC 0x000001c4 + +#define NV50_COMPUTE_DMA_TIC 0x000001c8 + +#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc + +#define NV50_COMPUTE_UNK0200 0x00000200 +#define NV50_COMPUTE_UNK0200_UNK1__MASK 0x0000ffff +#define NV50_COMPUTE_UNK0200_UNK1__SHIFT 0 +#define NV50_COMPUTE_UNK0200_UNK2__MASK 0x00ff0000 +#define NV50_COMPUTE_UNK0200_UNK2__SHIFT 16 + +#define NV50_COMPUTE_UNK0204 0x00000204 + +#define NV50_COMPUTE_UNK0208 0x00000208 + +#define NV50_COMPUTE_UNK020C 0x0000020c + +#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210 + +#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214 + +#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218 + +#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c + +#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220 + +#define NV50_COMPUTE_CALL_LIMIT_LOG 0x00000224 + +#define NV50_COMPUTE_UNK0228 0x00000228 +#define NV50_COMPUTE_UNK0228_UNK0 0x00000001 +#define NV50_COMPUTE_UNK0228_UNK4__MASK 0x00000ff0 +#define NV50_COMPUTE_UNK0228_UNK4__SHIFT 4 +#define NV50_COMPUTE_UNK0228_UNK12__MASK 0x000ff000 +#define NV50_COMPUTE_UNK0228_UNK12__SHIFT 12 + +#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c + +#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230 +#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN 0x00000020 + +#define NV50_COMPUTE_TSC_LIMIT 0x00000234 +#define NV50_COMPUTE_TSC_LIMIT__MAX 0x00001fff + +#define NV50_COMPUTE_CB_ADDR 0x00000238 +#define NV50_COMPUTE_CB_ADDR_ID__MASK 0x003fff00 +#define NV50_COMPUTE_CB_ADDR_ID__SHIFT 8 +#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK 0x0000007f +#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT 0 + +#define NV50_COMPUTE_CB_DATA(i0) (0x0000023c + 0x4*(i0)) +#define NV50_COMPUTE_CB_DATA__ESIZE 0x00000004 +#define NV50_COMPUTE_CB_DATA__LEN 0x00000010 + +#define NV50_COMPUTE_TSC_FLUSH 0x0000027c +#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_COMPUTE_TIC_FLUSH 0x00000280 +#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NV50_COMPUTE_DELAY1 0x00000284 + +#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288 + +#define NV50_COMPUTE_DELAY2 0x0000028c + +#define NV50_COMPUTE_UNK0290 0x00000290 + +#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294 + +#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298 +#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN 0x00000100 + +#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c + +#define NV50_COMPUTE_UNK02A0 0x000002a0 + +#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4 + +#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8 + +#define NV50_COMPUTE_CB_DEF_SET 0x000002ac +#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK 0x0000ffff +#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT 0 +#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK 0x007f0000 +#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT 16 + +#define NV50_COMPUTE_UNK02B0 0x000002b0 + +#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4 +#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK 0x0000ffff +#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT 0 +#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK 0x00ff0000 +#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT 16 + +#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8 + +#define NV50_COMPUTE_UNK02BC 0x000002bc +#define NV50_COMPUTE_UNK02BC_UNK1__MASK 0x00000007 +#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT 0 +#define NV50_COMPUTE_UNK02BC_UNK2__MASK 0x00000070 +#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT 4 + +#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0 + +#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4 + +#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8 + +#define NV50_COMPUTE_TIC_LIMIT 0x000002cc + +#define NV50_COMPUTE_MP_PM_SET(i0) (0x000002d0 + 0x4*(i0)) +#define NV50_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NV50_COMPUTE_MP_PM_SET__LEN 0x00000004 + +#define NV50_COMPUTE_MP_PM_CONTROL(i0) (0x000002e0 + 0x4*(i0)) +#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE 0x00000004 +#define NV50_COMPUTE_MP_PM_CONTROL__LEN 0x00000004 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK 0x00000001 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT 0 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP 0x00000000 +#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE 0x00000001 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK 0x00000070 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT 4 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0 0x00000000 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1 0x00000010 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2 0x00000020 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3 0x00000030 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4 0x00000040 +#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5 0x00000050 +#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK 0x00ffff00 +#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT 8 +#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK 0xff000000 +#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT 24 + +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE 0x000002f0 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0 0x00000001 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1 0x00000002 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2 0x00000004 +#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3 0x00000008 + +#define NV50_COMPUTE_UNK02F4 0x000002f4 + +#define NV50_COMPUTE_BLOCKDIM_LATCH 0x000002f8 + +#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc + +#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300 + +#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304 + +#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308 + +#define NV50_COMPUTE_UNK030C 0x0000030c + +#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310 + +#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314 + +#define NV50_COMPUTE_QUERY_SEQUENCE 0x00000318 + +#define NV50_COMPUTE_QUERY_GET 0x0000031c +#define NV50_COMPUTE_QUERY_GET_INTR 0x00000200 +#define NV50_COMPUTE_QUERY_GET_SHORT 0x00008000 + +#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320 + +#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324 + +#define NV50_COMPUTE_COND_MODE 0x00000328 +#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NV50_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NV50_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NV50_COMPUTE_UNK032C 0x0000032c + +#define NV50_COMPUTE_UNK0330 0x00000330 + +#define NV50_COMPUTE_UNK0334(i0) (0x00000334 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0334__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0334__LEN 0x00000003 + +#define NV50_COMPUTE_UNK0340(i0) (0x00000340 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0340__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0340__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0348(i0) (0x00000348 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0348__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0348__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0350(i0) (0x00000350 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0350__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0350__LEN 0x00000002 + +#define NV50_COMPUTE_UNK0358 0x00000358 + +#define NV50_COMPUTE_UNK035C 0x0000035c + +#define NV50_COMPUTE_UNK0360 0x00000360 +#define NV50_COMPUTE_UNK0360_UNK0__MASK 0x000000f0 +#define NV50_COMPUTE_UNK0360_UNK0__SHIFT 4 +#define NV50_COMPUTE_UNK0360_UNK1__MASK 0x00000f00 +#define NV50_COMPUTE_UNK0360_UNK1__SHIFT 8 + +#define NV50_COMPUTE_UNK0364 0x00000364 + +#define NV50_COMPUTE_LAUNCH 0x00000368 + +#define NV50_COMPUTE_UNK036C 0x0000036c + +#define NV50_COMPUTE_UNK0370 0x00000370 + +#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374 +#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK 0x000000ff +#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT 0 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK 0x0000ff00 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT 8 +#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX 0x00000040 + +#define NV50_COMPUTE_LINKED_TSC 0x00000378 + +#define NV50_COMPUTE_UNK037C 0x0000037c +#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV 0x00000001 +#define NV50_COMPUTE_UNK037C_UNK16 0x00010000 + +#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380 + +#define NV50_COMPUTE_UNK0384 0x00000384 + +#define NV50_COMPUTE_GRIDID 0x00000388 + +#define NV50_COMPUTE_UNK038C(i0) (0x0000038c + 0x4*(i0)) +#define NV50_COMPUTE_UNK038C__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK038C__LEN 0x00000003 + +#define NV50_COMPUTE_WRCACHE_FLUSH 0x00000398 + +#define NV50_COMPUTE_UNK039C(i0) (0x0000039c + 0x4*(i0)) +#define NV50_COMPUTE_UNK039C__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK039C__LEN 0x00000002 + +#define NV50_COMPUTE_GRIDDIM 0x000003a4 +#define NV50_COMPUTE_GRIDDIM_X__MASK 0x0000ffff +#define NV50_COMPUTE_GRIDDIM_X__SHIFT 0 +#define NV50_COMPUTE_GRIDDIM_Y__MASK 0xffff0000 +#define NV50_COMPUTE_GRIDDIM_Y__SHIFT 16 + +#define NV50_COMPUTE_SHARED_SIZE 0x000003a8 +#define NV50_COMPUTE_SHARED_SIZE__MAX 0x00004000 +#define NV50_COMPUTE_SHARED_SIZE__ALIGN 0x00000040 + +#define NV50_COMPUTE_BLOCKDIM_XY 0x000003ac +#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK 0x0000ffff +#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT 0 +#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK 0xffff0000 +#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT 16 + +#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0 +#define NV50_COMPUTE_BLOCKDIM_Z__MIN 0x00000001 +#define NV50_COMPUTE_BLOCKDIM_Z__MAX 0x00000040 + +#define NV50_COMPUTE_CP_START_ID 0x000003b4 + +#define NV50_COMPUTE_REG_MODE 0x000003b8 +#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001 +#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002 + +#define NV50_COMPUTE_TEX_LIMITS 0x000003bc +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NV50_COMPUTE_BIND_TSC 0x000003c0 +#define NV50_COMPUTE_BIND_TSC_VALID 0x00000001 +#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK 0x000000f0 +#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4 +#define NV50_COMPUTE_BIND_TSC_TSC__MASK 0x001ff000 +#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT 12 + +#define NV50_COMPUTE_BIND_TIC 0x000003c4 +#define NV50_COMPUTE_BIND_TIC_VALID 0x00000001 +#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1 +#define NV50_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT 9 + +#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8 +#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK 0x00000f00 +#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT 8 +#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000 +#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT 12 +#define NV50_COMPUTE_SET_PROGRAM_CB_VALID 0x000000ff + +#define NV50_COMPUTE_UNK03CC 0x000003cc + +#define NV50_COMPUTE_TEX_CACHE_CTL 0x000003d0 +#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK 0x00000030 +#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT 4 + +#define NV50_COMPUTE_UNK03D4 0x000003d4 + +#define NV50_COMPUTE_UNK03D8 0x000003d8 + +#define NV50_COMPUTE_UNK03DC 0x000003dc + +#define NV50_COMPUTE_UNK03E0 0x000003e0 + +#define NV50_COMPUTE_UNK03E4 0x000003e4 + +#define NVA3_COMPUTE_TEX_MISC 0x000003e8 +#define NVA3_COMPUTE_TEX_MISC_UNK1 0x00000001 +#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002 + +#define NV50_COMPUTE_GLOBAL(i0) (0x00000400 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL__ESIZE 0x00000020 +#define NV50_COMPUTE_GLOBAL__LEN 0x00000010 + +#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0) (0x00000400 + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0) (0x00000404 + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_PITCH(i0) (0x00000408 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL_PITCH__MAX 0x00800000 +#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN 0x00000100 + +#define NV50_COMPUTE_GLOBAL_LIMIT(i0) (0x0000040c + 0x20*(i0)) + +#define NV50_COMPUTE_GLOBAL_MODE(i0) (0x00000410 + 0x20*(i0)) +#define NV50_COMPUTE_GLOBAL_MODE_LINEAR 0x00000001 +#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK 0x000000f0 +#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT 4 +#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK 0x00000f00 +#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT 8 + +#define NV50_COMPUTE_USER_PARAM(i0) (0x00000600 + 0x4*(i0)) +#define NV50_COMPUTE_USER_PARAM__ESIZE 0x00000004 +#define NV50_COMPUTE_USER_PARAM__LEN 0x00000040 + +#define NV50_COMPUTE_UNK0700(i0) (0x00000700 + 0x4*(i0)) +#define NV50_COMPUTE_UNK0700__ESIZE 0x00000004 +#define NV50_COMPUTE_UNK0700__LEN 0x00000010 + + +#endif /* NV50_COMPUTE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 7867c2df7f3..f645a4d4e6b 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50) nouveau_bufctx_del(&nv50->bufctx_3d); nouveau_bufctx_del(&nv50->bufctx); + nouveau_bufctx_del(&nv50->bufctx_cp); util_unreference_framebuffer_state(&nv50->framebuffer); @@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50) if (!nv50->constbuf[s][i].user) pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL); } + + for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource **res = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, i); + pipe_resource_reference(res, NULL); + } + util_dynarray_fini(&nv50->global_residents); } static void @@ -263,10 +272,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) nv50->base.pushbuf = screen->base.pushbuf; nv50->base.client = screen->base.client; - ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT, - &nv50->bufctx_3d); + ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + if (!ret) + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT, + &nv50->bufctx_3d); if (!ret) - ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx); + ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT, + &nv50->bufctx_cp); if (ret) goto out_err; @@ -290,6 +302,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) pipe->draw_vbo = nv50_draw_vbo; pipe->clear = nv50_clear; + pipe->launch_grid = nv50_launch_grid; pipe->flush = nv50_flush; pipe->texture_barrier = nv50_texture_barrier; @@ -335,19 +348,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms); BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc); BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo); + if (screen->compute) { + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc); + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo); + } flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo); + if (screen->compute) + BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); nv50->base.scratch.bo_size = 2 << 20; + util_dynarray_init(&nv50->global_residents); + return pipe; out_err: if (nv50->bufctx_3d) nouveau_bufctx_del(&nv50->bufctx_3d); + if (nv50->bufctx_cp) + nouveau_bufctx_del(&nv50->bufctx_cp); if (nv50->bufctx) nouveau_bufctx_del(&nv50->bufctx); FREE(nv50->blit); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index fb74a9748a3..fbafe029948 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -49,6 +49,10 @@ #define NV50_NEW_MIN_SAMPLES (1 << 22) #define NV50_NEW_CONTEXT (1 << 31) +#define NV50_NEW_CP_PROGRAM (1 << 0) +#define NV50_NEW_CP_GLOBALS (1 << 1) + +/* 3d bufctx (during draw_vbo, blit_3d) */ #define NV50_BIND_FB 0 #define NV50_BIND_VERTEX 1 #define NV50_BIND_VERTEX_TMP 2 @@ -58,7 +62,14 @@ #define NV50_BIND_SO 53 #define NV50_BIND_SCREEN 54 #define NV50_BIND_TLS 55 -#define NV50_BIND_COUNT 56 +#define NV50_BIND_3D_COUNT 56 + +/* compute bufctx (during launch_grid) */ +#define NV50_BIND_CP_GLOBAL 0 +#define NV50_BIND_CP_SCREEN 1 +#define NV50_BIND_CP_COUNT 2 + +/* bufctx for other operations */ #define NV50_BIND_2D 0 #define NV50_BIND_M2MF 0 #define NV50_BIND_FENCE 1 @@ -101,8 +112,10 @@ struct nv50_context { struct nouveau_bufctx *bufctx_3d; struct nouveau_bufctx *bufctx; + struct nouveau_bufctx *bufctx_cp; uint32_t dirty; + uint32_t dirty_cp; /* dirty flags for compute state */ bool cb_dirty; struct nv50_graph_state state; @@ -115,6 +128,7 @@ struct nv50_context { struct nv50_program *vertprog; struct nv50_program *gmtyprog; struct nv50_program *fragprog; + struct nv50_program *compprog; struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[3]; @@ -163,6 +177,8 @@ struct nv50_context { uint32_t cond_condmode; /* the calculated condition */ struct nv50_blitctx *blit; + + struct util_dynarray global_residents; }; static inline struct nv50_context * @@ -302,4 +318,9 @@ struct pipe_video_buffer * nv98_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *template); +/* nv50_compute.c */ +void +nv50_launch_grid(struct pipe_context *, const uint *, const uint *, + uint32_t, const void *); + #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 707bf7a8ae3..48057d20f4e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -259,6 +259,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) return nv50_vertprog_assign_slots(info); case PIPE_SHADER_FRAGMENT: return nv50_fragprog_assign_slots(info); + case PIPE_SHADER_COMPUTE: + return 0; default: return -1; } @@ -355,6 +357,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->gp.has_layer = 0; prog->gp.has_viewport = 0; + if (prog->type == PIPE_SHADER_COMPUTE) + info->prop.cp.inputOffset = 0x10; + info->driverPriv = prog; #ifdef DEBUG @@ -401,6 +406,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, break; } prog->gp.vert_count = info->prop.gp.maxVertices; + } else + if (prog->type == PIPE_SHADER_COMPUTE) { + prog->cp.syms = info->bin.syms; + prog->cp.num_syms = info->bin.numSyms; } if (prog->pipe.stream_output.num_outputs) @@ -423,11 +432,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) struct nouveau_heap *heap; int ret; uint32_t size = align(prog->code_size, 0x40); + uint8_t prog_type; switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; + case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); return false; @@ -450,7 +461,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) return false; } } - prog->code_base = prog->mem->start; + + if (prog->type == PIPE_SHADER_COMPUTE) { + /* CP code must be uploaded in FP code segment. */ + prog_type = 1; + } else { + prog->code_base = prog->mem->start; + prog_type = prog->type; + } ret = nv50_tls_realloc(nv50->screen, prog->tls_space); if (ret < 0) { @@ -468,7 +486,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) false /* flatshade */); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, - (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, NOUVEAU_BO_VRAM, prog->code_size, prog->code); BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index 7a33eb11d6d..f0016707163 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -98,6 +98,13 @@ struct nv50_program { ubyte viewportid; /* hw value of viewport index output */ } gp; + struct { + uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */ + uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */ + void *syms; + unsigned num_syms; + } cp; + void *fixups; /* relocation records */ void *interps; /* interpolation records */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index f47e998ab1e..0142e86ba20 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -41,8 +41,6 @@ #define THREADS_IN_WARP 32 -#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) - static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, @@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_COMPUTE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ @@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: - case PIPE_CAP_COMPUTE: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */ @@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_COMPUTE: break; default: return 0; @@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) return 0.0f; } +static int +nv50_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_compute_cap param, void *data) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x)); \ + return sizeof(x); \ +} while (0) + + switch (param) { + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + RET((uint64_t []) { 2 }); + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + RET(((uint64_t []) { 65535, 65535 })); + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + RET(((uint64_t []) { 512, 512, 64 })); + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + RET((uint64_t []) { 512 }); + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */ + RET((uint64_t []) { 1ULL << 32 }); + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ + RET((uint64_t []) { 16 << 10 }); + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ + RET((uint64_t []) { 16 << 10 }); + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ + RET((uint64_t []) { 4096 }); + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + RET((uint32_t []) { 32 }); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + RET((uint64_t []) { 1ULL << 40 }); + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 0 }); + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { screen->mp_count }); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ + default: + return 0; + } + +#undef RET +} + static void nv50_screen_destroy(struct pipe_screen *pscreen) { @@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->tesla); nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); + nouveau_object_del(&screen->compute); nouveau_object_del(&screen->sync); nouveau_screen_fini(&screen->base); @@ -742,6 +788,7 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; + pscreen->get_compute_param = nv50_screen_get_compute_param; nv50_screen_init_resource_functions(pscreen); @@ -851,6 +898,8 @@ nv50_screen_create(struct nouveau_device *dev) screen->TPs = util_bitcount(value & 0xffff); screen->MPsInTP = util_bitcount((value >> 24) & 0xf); + screen->mp_count = screen->TPs * screen->MPsInTP; + stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP * STACK_WARPS_ALLOC * 64 * 8; @@ -902,6 +951,12 @@ nv50_screen_create(struct nouveau_device *dev) nv50_screen_init_hwctx(screen); + ret = nv50_screen_compute_setup(screen, screen->base.pushbuf); + if (ret) { + NOUVEAU_ERR("Failed to init compute context: %d\n", ret); + goto fail; + } + nouveau_fence_new(&screen->base, &screen->base.fence.current, false); return pscreen; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index ce51f0fc254..153ceea7a4f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -23,6 +23,10 @@ struct nv50_context; #define NV50_MAX_VIEWPORTS 16 +#define NV50_MAX_GLOBALS 16 + +#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float)) + struct nv50_blitter; struct nv50_graph_state { @@ -66,6 +70,7 @@ struct nv50_screen { unsigned MPsInTP; unsigned max_tls_space; unsigned cur_tls_space; + unsigned mp_count; struct nouveau_heap *vp_code_heap; struct nouveau_heap *gp_code_heap; @@ -93,6 +98,7 @@ struct nv50_screen { struct nouveau_object *sync; struct nouveau_object *tesla; + struct nouveau_object *compute; struct nouveau_object *eng2d; struct nouveau_object *m2mf; }; @@ -109,6 +115,8 @@ void nv50_blitter_destroy(struct nv50_screen *); int nv50_screen_tic_alloc(struct nv50_screen *, void *); int nv50_screen_tsc_alloc(struct nv50_screen *, void *); +int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *); + static inline void nv50_resource_fence(struct nv04_resource *res, uint32_t flags) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index d27f12ca94b..b4ea08d4d13 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) nv50->dirty |= NV50_NEW_GMTYPROG; } +static void * +nv50_cp_state_create(struct pipe_context *pipe, + const struct pipe_compute_state *cso) +{ + struct nv50_program *prog; + + prog = CALLOC_STRUCT(nv50_program); + if (!prog) + return NULL; + prog->type = PIPE_SHADER_COMPUTE; + + prog->cp.smem_size = cso->req_local_mem; + prog->cp.lmem_size = cso->req_private_mem; + prog->parm_size = cso->req_input_mem; + + prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + + return (void *)prog; +} + +static void +nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->compprog = hwcso; + nv50->dirty_cp |= NV50_NEW_CP_PROGRAM; +} + static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct pipe_constant_buffer *cb) @@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe, nv50->dirty |= NV50_NEW_STRMOUT; } +static void +nv50_set_compute_resources(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_surface **resources) +{ + /* TODO: bind surfaces */ +} + +static inline void +nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res) +{ + struct nv04_resource *buf = nv04_resource(res); + if (buf) { + uint64_t limit = (buf->address + buf->base.width0) - 1; + if (limit < (1ULL << 32)) { + *phandle = (uint32_t)buf->address; + } else { + NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: " + "resource not contained within 32-bit address space !\n"); + *phandle = 0; + } + } else { + *phandle = 0; + } +} + +static void +nv50_set_global_bindings(struct pipe_context *pipe, + unsigned start, unsigned nr, + struct pipe_resource **resources, + uint32_t **handles) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_resource **ptr; + unsigned i; + const unsigned end = start + nr; + + if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) { + const unsigned old_size = nv50->global_residents.size; + const unsigned req_size = end * sizeof(struct pipe_resource *); + util_dynarray_resize(&nv50->global_residents, req_size); + memset((uint8_t *)nv50->global_residents.data + old_size, 0, + req_size - old_size); + } + + if (resources) { + ptr = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) { + pipe_resource_reference(&ptr[i], resources[i]); + nv50_set_global_handle(handles[i], resources[i]); + } + } else { + ptr = util_dynarray_element( + &nv50->global_residents, struct pipe_resource *, start); + for (i = 0; i < nr; ++i) + pipe_resource_reference(&ptr[i], NULL); + } + + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL); + + nv50->dirty_cp = NV50_NEW_CP_GLOBALS; +} + void nv50_init_state_functions(struct nv50_context *nv50) { @@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->create_vs_state = nv50_vp_state_create; pipe->create_fs_state = nv50_fp_state_create; pipe->create_gs_state = nv50_gp_state_create; + pipe->create_compute_state = nv50_cp_state_create; pipe->bind_vs_state = nv50_vp_state_bind; pipe->bind_fs_state = nv50_fp_state_bind; pipe->bind_gs_state = nv50_gp_state_bind; + pipe->bind_compute_state = nv50_cp_state_bind; pipe->delete_vs_state = nv50_sp_state_delete; pipe->delete_fs_state = nv50_sp_state_delete; pipe->delete_gs_state = nv50_sp_state_delete; + pipe->delete_compute_state = nv50_sp_state_delete; pipe->set_blend_color = nv50_set_blend_color; pipe->set_stencil_ref = nv50_set_stencil_ref; @@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->stream_output_target_destroy = nv50_so_target_destroy; pipe->set_stream_output_targets = nv50_set_stream_output_targets; + pipe->set_global_binding = nv50_set_global_bindings; + pipe->set_compute_resources = nv50_set_compute_resources; + nv50->sample_mask = ~0; nv50->min_samples = 1; } |