summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h1
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h1
-rw-r--r--src/gallium/drivers/nv50/nv50_defs.xml.h60
-rw-r--r--src/gallium/drivers/nvc0/Makefile.sources1
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c43
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h71
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c85
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.h9
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c84
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h12
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c10
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state.c284
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state_validate.c16
-rw-r--r--src/gallium/drivers/nvc0/nvc0_surface.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c295
-rw-r--r--src/gallium/drivers/nvc0/nve4_compute.c607
-rw-r--r--src/gallium/drivers/nvc0/nve4_compute.h110
-rw-r--r--src/gallium/drivers/nvc0/nve4_compute.xml.h269
18 files changed, 1882 insertions, 78 deletions
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 1de3fa65f5d..d5bc8171bb5 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -5,6 +5,7 @@
#include "util/u_memory.h"
typedef uint32_t u32;
+typedef uint16_t u16;
extern int nouveau_mesa_debug;
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index 66ba61b4622..2fd52acf36a 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -196,6 +196,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_COMPUTE_CLASS 0x000090c0
#define NVC8_COMPUTE_CLASS 0x000092c0
#define NVE4_COMPUTE_CLASS 0x0000a0c0
+#define NVF0_COMPUTE_CLASS 0x0000a1c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
diff --git a/src/gallium/drivers/nv50/nv50_defs.xml.h b/src/gallium/drivers/nv50/nv50_defs.xml.h
index 27046e9e564..2e42843fa56 100644
--- a/src/gallium/drivers/nv50/nv50_defs.xml.h
+++ b/src/gallium/drivers/nv50/nv50_defs.xml.h
@@ -1,5 +1,5 @@
-#ifndef RNNDB_NV50_DEFS_XML
-#define RNNDB_NV50_DEFS_XML
+#ifndef NV50_DEFS_XML
+#define NV50_DEFS_XML
/* Autogenerated file, DO NOT EDIT manually!
@@ -8,11 +8,11 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58)
-- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58)
-- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58)
+- rnndb/nv50_defs.xml ( 7783 bytes, from 2013-02-14 13:56:25)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nvchipsets.xml ( 3704 bytes, from 2012-08-18 12:48:55)
-Copyright (C) 2006-2011 by the following authors:
+Copyright (C) 2006-2013 by the following authors:
- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
- Ben Skeggs (darktama, darktama_)
- B. R. <koala_br@users.sourceforge.net> (koala_br)
@@ -71,6 +71,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#define NV50_VSTATUS_IDLE 0x00000000
+#define NV50_VSTATUS_BUSY 0x00000001
+#define NV50_VSTATUS_UNK2 0x00000002
+#define NV50_VSTATUS_WAITING 0x00000003
+#define NV50_VSTATUS_BLOCKED 0x00000005
+#define NV50_VSTATUS_FAULTED 0x00000006
+#define NV50_VSTATUS_PAUSED 0x00000007
#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c
#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d
#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
@@ -143,6 +150,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
+#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
+#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003
+#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004
+#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
+#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
+#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
+#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
+#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
+#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
+#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e
+#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f
+#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
+#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
+#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
+#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
+#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
+#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
+#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d
+#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e
+#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f
+#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020
+#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021
+#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
+#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027
+#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028
+#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029
+#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e
+#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f
+#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030
+#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031
+#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032
+#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033
+#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034
+#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035
+#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036
+#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037
+#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038
+#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039
+#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a
#define NV50_QUERY__SIZE 0x00000010
#define NV50_QUERY_COUNTER 0x00000000
@@ -151,4 +197,4 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_QUERY_TIME 0x00000008
-#endif /* RNNDB_NV50_DEFS_XML */
+#endif /* NV50_DEFS_XML */
diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources
index 33b90f290fb..db8d12347b0 100644
--- a/src/gallium/drivers/nvc0/Makefile.sources
+++ b/src/gallium/drivers/nvc0/Makefile.sources
@@ -14,6 +14,7 @@ C_SOURCES := \
nvc0_program.c \
nvc0_shader_state.c \
nvc0_query.c \
+ nve4_compute.c \
nvc0_video.c \
nvc0_video_bsp.c \
nvc0_video_vp.c \
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 75bd1551b8f..dc0c4b922db 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -63,6 +63,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
nouveau_bufctx_del(&nvc0->bufctx_3d);
nouveau_bufctx_del(&nvc0->bufctx);
+ nouveau_bufctx_del(&nvc0->bufctx_cp);
util_unreference_framebuffer_state(&nvc0->framebuffer);
@@ -71,7 +72,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
- for (s = 0; s < 5; ++s) {
+ for (s = 0; s < 6; ++s) {
for (i = 0; i < nvc0->num_textures[s]; ++i)
pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
@@ -80,8 +81,21 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, NULL);
}
+ for (s = 0; s < 2; ++s) {
+ for (i = 0; i < NVC0_MAX_SURFACE_SLOTS; ++i)
+ pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
+ }
+
for (i = 0; i < nvc0->num_tfbbufs; ++i)
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
+
+ for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource **res = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, i);
+ pipe_resource_reference(res, NULL);
+ }
+ util_dynarray_fini(&nvc0->global_residents);
}
static void
@@ -219,10 +233,13 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
nvc0->base.pushbuf = screen->base.pushbuf;
nvc0->base.client = screen->base.client;
- ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_COUNT,
- &nvc0->bufctx_3d);
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_3D_COUNT,
+ &nvc0->bufctx_3d);
if (!ret)
- nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx);
+ ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_CP_COUNT,
+ &nvc0->bufctx_cp);
if (ret)
goto out_err;
@@ -236,6 +253,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
pipe->draw_vbo = nvc0_draw_vbo;
pipe->clear = nvc0_clear;
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+ pipe->launch_grid = nve4_launch_grid;
pipe->flush = nvc0_flush;
pipe->texture_barrier = nvc0_texture_barrier;
@@ -274,23 +293,39 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
+ if (screen->compute) {
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
+ }
+
+ flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
+ if (screen->compute)
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls);
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo);
+ if (screen->compute)
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
nvc0->base.scratch.bo_size = 2 << 20;
memset(nvc0->tex_handles, ~0, sizeof(nvc0->tex_handles));
+ util_dynarray_init(&nvc0->global_residents);
+
return pipe;
out_err:
if (nvc0) {
if (nvc0->bufctx_3d)
nouveau_bufctx_del(&nvc0->bufctx_3d);
+ if (nvc0->bufctx_cp)
+ nouveau_bufctx_del(&nvc0->bufctx_cp);
if (nvc0->bufctx)
nouveau_bufctx_del(&nvc0->bufctx);
if (nvc0->blit)
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index f5b0b6b849e..d9aa3788cb0 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -55,7 +55,16 @@
#define NVC0_NEW_SAMPLERS (1 << 20)
#define NVC0_NEW_TFB_TARGETS (1 << 21)
#define NVC0_NEW_IDXBUF (1 << 22)
+#define NVC0_NEW_SURFACES (1 << 23)
+#define NVC0_NEW_CP_PROGRAM (1 << 0)
+#define NVC0_NEW_CP_SURFACES (1 << 1)
+#define NVC0_NEW_CP_TEXTURES (1 << 2)
+#define NVC0_NEW_CP_SAMPLERS (1 << 3)
+#define NVC0_NEW_CP_CONSTBUF (1 << 4)
+#define NVC0_NEW_CP_GLOBALS (1 << 5)
+
+/* 3d bufctx (during draw_vbo, blit_3d) */
#define NVC0_BIND_FB 0
#define NVC0_BIND_VTX 1
#define NVC0_BIND_VTX_TMP 2
@@ -63,10 +72,21 @@
#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i))
#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
#define NVC0_BIND_TFB 244
-#define NVC0_BIND_SCREEN 245
-#define NVC0_BIND_TLS 246
-#define NVC0_BIND_COUNT 247
-
+#define NVC0_BIND_SUF 245
+#define NVC0_BIND_SCREEN 246
+#define NVC0_BIND_TLS 247
+#define NVC0_BIND_3D_COUNT 248
+
+/* compute bufctx (during launch_grid) */
+#define NVC0_BIND_CP_CB(i) ( 0 + (i))
+#define NVC0_BIND_CP_TEX(i) ( 16 + (i))
+#define NVC0_BIND_CP_SUF 48
+#define NVC0_BIND_CP_GLOBAL 49
+#define NVC0_BIND_CP_DESC 50
+#define NVC0_BIND_CP_SCREEN 51
+#define NVC0_BIND_CP_COUNT 52
+
+/* bufctx for other operations */
#define NVC0_BIND_2D 0
#define NVC0_BIND_M2MF 0
#define NVC0_BIND_FENCE 1
@@ -81,6 +101,7 @@ struct nvc0_context {
struct nouveau_bufctx *bufctx_3d;
struct nouveau_bufctx *bufctx;
+ struct nouveau_bufctx *bufctx_cp;
struct nvc0_screen *screen;
@@ -90,6 +111,7 @@ struct nvc0_context {
uint32_t nblocksx, uint32_t nblocksy);
uint32_t dirty;
+ uint32_t dirty_cp; /* dirty flags for compute state */
struct {
boolean flushed;
@@ -105,8 +127,8 @@ struct nvc0_context {
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
- uint8_t num_textures[5];
- uint8_t num_samplers[5];
+ uint8_t num_textures[6];
+ uint8_t num_samplers[6];
uint8_t tls_required; /* bitmask of shader types using l[] */
uint8_t c14_bound; /* whether immediate array constbuf is bound */
uint8_t clip_enable;
@@ -125,9 +147,10 @@ struct nvc0_context {
struct nvc0_program *tevlprog;
struct nvc0_program *gmtyprog;
struct nvc0_program *fragprog;
+ struct nvc0_program *compprog;
- struct nvc0_constbuf constbuf[5][NVC0_MAX_PIPE_CONSTBUFS];
- uint16_t constbuf_dirty[5];
+ struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
+ uint16_t constbuf_dirty[6];
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
@@ -139,14 +162,14 @@ struct nvc0_context {
uint32_t instance_off; /* current base vertex for instanced arrays */
uint32_t instance_max; /* last instance for current draw call */
- struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS];
- unsigned num_textures[5];
- uint32_t textures_dirty[5];
- struct nv50_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS];
- unsigned num_samplers[5];
- uint16_t samplers_dirty[5];
+ struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS];
+ unsigned num_textures[6];
+ uint32_t textures_dirty[6];
+ struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
+ unsigned num_samplers[6];
+ uint16_t samplers_dirty[6];
- uint32_t tex_handles[5][PIPE_MAX_SAMPLERS]; /* for nve4 */
+ uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
struct pipe_framebuffer_state framebuffer;
struct pipe_blend_color blend_colour;
@@ -169,6 +192,12 @@ struct nvc0_context {
struct nvc0_blitctx *blit;
+ struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
+ uint16_t surfaces_dirty[2];
+ uint16_t surfaces_valid[2];
+
+ struct util_dynarray global_residents;
+
#ifdef NVC0_WITH_DRAW_MODULE
struct draw_context *draw;
#endif
@@ -211,6 +240,8 @@ boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_library_upload(struct nvc0_context *);
+uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
+ uint32_t label);
/* nvc0_query.c */
void nvc0_init_query_functions(struct nvc0_context *);
@@ -236,6 +267,8 @@ void nvc0_tfb_validate(struct nvc0_context *);
extern void nvc0_init_state_functions(struct nvc0_context *);
/* nvc0_state_validate.c */
+void nvc0_validate_global_residents(struct nvc0_context *,
+ struct nouveau_bufctx *, int bin);
extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
unsigned space_words);
@@ -246,9 +279,13 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers,
extern void nvc0_init_surface_functions(struct nvc0_context *);
/* nvc0_tex.c */
+boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s);
void nvc0_validate_textures(struct nvc0_context *);
void nvc0_validate_samplers(struct nvc0_context *);
void nve4_set_tex_handles(struct nvc0_context *);
+void nvc0_validate_surfaces(struct nvc0_context *);
+void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_surface *,
+ struct nvc0_screen *);
struct pipe_sampler_view *
nvc0_create_texture_view(struct pipe_context *,
@@ -315,4 +352,8 @@ nvc0_screen_get_video_param(struct pipe_screen *pscreen,
/* nvc0_push.c */
void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
+/* nve4_compute.c */
+void nve4_launch_grid(struct pipe_context *,
+ const uint *, const uint *, uint32_t, const void *);
+
#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index e4ac8ba8e2c..592d338f446 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -25,6 +25,7 @@
#include "nvc0_context.h"
#include "nv50/codegen/nv50_ir_driver.h"
+#include "nve4_compute.h"
/* If only they told use the actual semantic instead of just GENERIC ... */
static void
@@ -533,10 +534,11 @@ nvc0_program_dump(struct nvc0_program *prog)
{
unsigned pos;
- for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
- debug_printf("HDR[%02lx] = 0x%08x\n",
- pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
-
+ if (prog->type != PIPE_SHADER_COMPUTE) {
+ for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
+ debug_printf("HDR[%02lx] = 0x%08x\n",
+ pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
+ }
debug_printf("shader binary code (0x%x bytes):", prog->code_size);
for (pos = 0; pos < prog->code_size / 4; ++pos) {
if ((pos % 8) == 0)
@@ -569,11 +571,11 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.resInfoCBSlot = 0;
- info->io.texBindBase = 0; /* TODO */
- info->io.suInfoBase = 0; /* TODO */
+ info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
+ info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
}
info->io.msInfoCBSlot = 0;
- info->io.msInfoBase = 0; /* TODO */
+ info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
} else {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.resInfoCBSlot = 15;
@@ -598,14 +600,16 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
- FREE(info->bin.syms);
+ if (prog->type != PIPE_SHADER_COMPUTE)
+ FREE(info->bin.syms);
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
prog->immd_data = info->immd.buf;
prog->immd_size = info->immd.bufSize;
prog->relocs = info->bin.relocData;
- prog->max_gpr = MAX2(4, (info->bin.maxGPR + 1));
+ prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+ prog->num_barriers = info->numBarriers;
prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
@@ -633,6 +637,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
case PIPE_SHADER_FRAGMENT:
ret = nvc0_fp_gen_header(prog, info);
break;
+ case PIPE_SHADER_COMPUTE:
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
+ break;
default:
ret = -1;
NOUVEAU_ERR("unknown program type: %u\n", prog->type);
@@ -672,8 +680,9 @@ boolean
nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
struct nvc0_screen *screen = nvc0->screen;
+ const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
- uint32_t size = prog->code_size + NVC0_SHADER_HEADER_SIZE;
+ uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
uint32_t lib_pos = screen->lib_code->start;
uint32_t code_pos;
@@ -689,7 +698,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
* latency information is expected only at certain positions.
*/
if (screen->base.class_3d >= NVE4_3D_CLASS)
- size = size + 0x70;
+ size = size + (is_cp ? 0x40 : 0x70);
size = align(size, 0x40);
ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
@@ -714,18 +723,27 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
prog->mem->start + prog->mem->size));
- if (screen->base.class_3d >= NVE4_3D_CLASS) {
- switch (prog->mem->start & 0xff) {
- case 0x40: prog->code_base += 0x70; break;
- case 0x80: prog->code_base += 0x30; break;
- case 0xc0: prog->code_base += 0x70; break;
- default:
- prog->code_base += 0x30;
- assert((prog->mem->start & 0xff) == 0x00);
- break;
+ if (!is_cp) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ switch (prog->mem->start & 0xff) {
+ case 0x40: prog->code_base += 0x70; break;
+ case 0x80: prog->code_base += 0x30; break;
+ case 0xc0: prog->code_base += 0x70; break;
+ default:
+ prog->code_base += 0x30;
+ assert((prog->mem->start & 0xff) == 0x00);
+ break;
+ }
+ }
+ code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
+ } else {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ if (prog->mem->start & 0x40)
+ prog->code_base += 0x40;
+ assert((prog->code_base & 0x7f) == 0x00);
}
+ code_pos = prog->code_base;
}
- code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
if (prog->relocs)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
@@ -735,10 +753,10 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
nvc0_program_dump(prog);
#endif
- nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
- NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
- nvc0->base.push_data(&nvc0->base, screen->text,
- prog->code_base + NVC0_SHADER_HEADER_SIZE,
+ if (!is_cp)
+ nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
+ NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
if (prog->immd_size)
nvc0->base.push_data(&nvc0->base,
@@ -790,6 +808,8 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
FREE(prog->code);
FREE(prog->immd_data);
FREE(prog->relocs);
+ if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
+ FREE(prog->cp.syms);
if (prog->tfb) {
if (nvc0->state.tfb == prog->tfb)
nvc0->state.tfb = NULL;
@@ -801,3 +821,18 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
prog->pipe = pipe;
prog->type = type;
}
+
+uint32_t
+nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
+{
+ const struct nv50_ir_prog_symbol *syms =
+ (const struct nv50_ir_prog_symbol *)prog->cp.syms;
+ unsigned base = 0;
+ unsigned i;
+ if (prog->type != PIPE_SHADER_COMPUTE)
+ base = NVC0_SHADER_HEADER_SIZE;
+ for (i = 0; i < prog->cp.num_syms; ++i)
+ if (syms[i].label == label)
+ return prog->code_base + base + syms[i].offset;
+ return ~0;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
index f6d1121c6dd..9c184d1f1d5 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nvc0/nvc0_program.h
@@ -22,7 +22,7 @@ struct nvc0_program {
ubyte type;
boolean translated;
boolean need_tls;
- uint8_t max_gpr;
+ uint8_t num_gprs;
uint32_t *code;
uint32_t *immd_data;
@@ -50,6 +50,13 @@ struct nvc0_program {
uint32_t tess_mode; /* ~0 if defined by the other stage */
uint32_t input_patch_size;
} tp;
+ struct {
+ uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
+ uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
+ void *syms;
+ unsigned num_syms;
+ } cp;
+ uint8_t num_barriers;
void *relocs;
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 077f89efef9..7d034797eef 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -88,12 +88,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
switch (param) {
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */
+ return 16 * 5;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 15;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return 12;
+ return (class_3d >= NVE4_3D_CLASS) ? 13 : 12;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
return 2048;
case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -176,6 +176,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return 0;
+ case PIPE_CAP_COMPUTE:
+ return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
default:
NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
return 0;
@@ -186,6 +188,8 @@ static int
nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
enum pipe_shader_cap param)
{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
switch (shader) {
case PIPE_SHADER_VERTEX:
/*
@@ -195,11 +199,17 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
break;
+ case PIPE_SHADER_COMPUTE:
+ if (class_3d < NVE4_3D_CLASS)
+ return 0;
+ break;
default:
return 0;
}
switch (param) {
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
@@ -216,6 +226,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_CONSTS:
return 65536 / 16;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS)
+ return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE;
return NVC0_MAX_PIPE_CONSTBUFS;
case PIPE_SHADER_CAP_MAX_ADDRS:
return 1;
@@ -234,7 +246,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_SUBROUTINES:
- return 1; /* but inlining everything, we need function declarations */
+ return 1;
case PIPE_SHADER_CAP_INTEGERS:
return 1;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
@@ -270,6 +282,47 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
}
}
+static int
+nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ uint64_t *data64 = (uint64_t *)data;
+ const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ data64[0] = 3;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
+ data64[1] = 65535;
+ data64[2] = 65535;
+ return 24;
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ data64[0] = 1024;
+ data64[1] = 1024;
+ data64[2] = 64;
+ return 24;
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ data64[0] = 1024;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
+ data64[0] = (uint64_t)1 << 40;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ data64[0] = 48 << 10;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ data64[0] = 512 << 10;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ data64[0] = 4096;
+ return 8;
+ default:
+ return 0;
+ }
+}
+
static void
nvc0_screen_destroy(struct pipe_screen *pscreen)
{
@@ -291,6 +344,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
nouveau_bo_ref(NULL, &screen->txc);
nouveau_bo_ref(NULL, &screen->fence.bo);
nouveau_bo_ref(NULL, &screen->poly_cache);
+ nouveau_bo_ref(NULL, &screen->parm);
nouveau_heap_destroy(&screen->lib_code);
nouveau_heap_destroy(&screen->text_heap);
@@ -412,6 +466,23 @@ nvc0_screen_fence_update(struct pipe_screen *pscreen)
return screen->fence.map[0];
}
+static int
+nvc0_screen_init_compute(struct nvc0_screen *screen)
+{
+ screen->base.base.get_compute_param = nvc0_screen_get_compute_param;
+
+ switch (screen->base.device->chipset & 0xf0) {
+ case 0xc0:
+ case 0xd0:
+ return 0;
+ case 0xe0:
+ case 0xf0:
+ return nve4_screen_compute_setup(screen, screen->base.pushbuf);
+ default:
+ return -1;
+ }
+}
+
#define FAIL_SCREEN_INIT(str, err) \
do { \
NOUVEAU_ERR(str, err); \
@@ -653,9 +724,9 @@ nvc0_screen_create(struct nouveau_device *dev)
/* max MPs * max warps per MP (TODO: ask kernel) */
if (screen->eng3d->oclass >= NVE4_3D_CLASS)
- screen->tls_size = 8 * 64;
+ screen->tls_size = 8 * 64 * 32;
else
- screen->tls_size = 16 * 48;
+ screen->tls_size = 16 * 48 * 32;
screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16;
screen->tls_size = align(screen->tls_size, 1 << 17);
@@ -775,6 +846,9 @@ nvc0_screen_create(struct nouveau_device *dev)
IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1);
+ if (nvc0_screen_init_compute(screen))
+ goto fail;
+
PUSH_KICK (push);
screen->tic.entries = CALLOC(4096, sizeof(void *));
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index 2adcfeac3ef..16f0febd3ea 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -15,7 +15,10 @@
#define NVC0_TSC_MAX_ENTRIES 2048
/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
-#define NVC0_MAX_PIPE_CONSTBUFS 14
+#define NVC0_MAX_PIPE_CONSTBUFS 14
+#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
+
+#define NVC0_MAX_SURFACE_SLOTS 16
struct nvc0_context;
@@ -29,7 +32,8 @@ struct nvc0_screen {
int num_occlusion_queries_active;
struct nouveau_bo *text;
- struct nouveau_bo *uniform_bo;
+ struct nouveau_bo *parm; /* for COMPUTE */
+ struct nouveau_bo *uniform_bo; /* for 3D */
struct nouveau_bo *tls;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *poly_cache;
@@ -63,7 +67,7 @@ struct nvc0_screen {
struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
- struct nouveau_object *dijkstra;
+ struct nouveau_object *compute;
};
static INLINE struct nvc0_screen *
@@ -80,6 +84,8 @@ void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
+int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+
static INLINE void
nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
{
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
index 786889f8b57..5cd6a84be57 100644
--- a/src/gallium/drivers/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -95,7 +95,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, 0x11);
PUSH_DATA (push, vp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
- PUSH_DATA (push, vp->max_gpr);
+ PUSH_DATA (push, vp->num_gprs);
// BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
// PUSH_DATA (push, 0);
@@ -120,7 +120,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, 0x51);
PUSH_DATA (push, fp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
- PUSH_DATA (push, fp->max_gpr);
+ PUSH_DATA (push, fp->num_gprs);
BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
PUSH_DATA (push, 0x20164010);
@@ -144,7 +144,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, 0x21);
PUSH_DATA (push, tp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
- PUSH_DATA (push, tp->max_gpr);
+ PUSH_DATA (push, tp->num_gprs);
if (tp->tp.input_patch_size <= 32)
IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
@@ -171,7 +171,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
PUSH_DATA (push, tp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
- PUSH_DATA (push, tp->max_gpr);
+ PUSH_DATA (push, tp->num_gprs);
} else {
BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
PUSH_DATA (push, 0x30);
@@ -197,7 +197,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
PUSH_DATA (push, gp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
- PUSH_DATA (push, gp->max_gpr);
+ PUSH_DATA (push, gp->num_gprs);
BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
} else {
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
index 30011df4dc1..cba076fb982 100644
--- a/src/gallium/drivers/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_state.c
@@ -489,6 +489,57 @@ nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
}
+static void
+nvc0_stage_sampler_states_bind_range(struct nvc0_context *nvc0,
+ const unsigned s,
+ unsigned start, unsigned nr, void **cso)
+{
+ const unsigned end = start + nr;
+ int last_valid = -1;
+ unsigned i;
+
+ if (cso) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (cso[p])
+ last_valid = i;
+ if (cso[p] == nvc0->samplers[s][i])
+ continue;
+ nvc0->samplers_dirty[s] |= 1 << i;
+
+ if (nvc0->samplers[s][i])
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+ nvc0->samplers[s][i] = cso[p];
+ }
+ } else {
+ for (i = start; i < end; ++i) {
+ if (nvc0->samplers[s][i]) {
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+ nvc0->samplers[s][i] = NULL;
+ nvc0->samplers_dirty[s] |= 1 << i;
+ }
+ }
+ }
+
+ if (nvc0->num_samplers[s] <= end) {
+ if (last_valid < 0) {
+ for (i = start; i && !nvc0->samplers[s][i - 1]; --i);
+ nvc0->num_samplers[s] = i;
+ } else {
+ nvc0->num_samplers[s] = last_valid + 1;
+ }
+ }
+}
+
+static void
+nvc0_cp_sampler_states_bind(struct pipe_context *pipe,
+ unsigned start, unsigned nr, void **cso)
+{
+ nvc0_stage_sampler_states_bind_range(nvc0_context(pipe), 5, start, nr, cso);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
+}
+
/* NOTE: only called when not referenced anywhere, won't be bound */
static void
nvc0_sampler_view_destroy(struct pipe_context *pipe,
@@ -561,6 +612,67 @@ nvc0_gp_set_sampler_views(struct pipe_context *pipe,
nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
}
+static void
+nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d;
+ const unsigned end = start + nr;
+ const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0);
+ int last_valid = -1;
+ unsigned i;
+
+ if (views) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (views[p])
+ last_valid = i;
+ if (views[p] == nvc0->textures[s][i])
+ continue;
+ nvc0->textures_dirty[s] |= 1 << i;
+
+ if (nvc0->textures[s][i]) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ nouveau_bufctx_reset(bctx, bin + i);
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ }
+ pipe_sampler_view_reference(&nvc0->textures[s][i], views[p]);
+ }
+ } else {
+ for (i = start; i < end; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ if (!old)
+ continue;
+ nvc0->textures_dirty[s] |= 1 << i;
+
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+ nouveau_bufctx_reset(bctx, bin + i);
+ }
+ }
+
+ if (nvc0->num_textures[s] <= end) {
+ if (last_valid < 0) {
+ for (i = start; i && !nvc0->textures[s][i - 1]; --i);
+ nvc0->num_textures[s] = i;
+ } else {
+ nvc0->num_textures[s] = last_valid + 1;
+ }
+ }
+}
+
+static void
+nvc0_cp_set_sampler_views(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views_range(nvc0_context(pipe), 5, start, nr, views);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_TEXTURES;
+}
+
+
/* ============================= SHADERS =======================================
*/
@@ -644,6 +756,35 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
nvc0->dirty |= NVC0_NEW_GMTYPROG;
}
+static void *
+nvc0_cp_state_create(struct pipe_context *pipe,
+ const struct pipe_compute_state *cso)
+{
+ struct nvc0_program *prog;
+
+ prog = CALLOC_STRUCT(nvc0_program);
+ if (!prog)
+ return NULL;
+ prog->type = PIPE_SHADER_COMPUTE;
+
+ prog->cp.smem_size = cso->req_local_mem;
+ prog->cp.lmem_size = cso->req_private_mem;
+ prog->parm_size = cso->req_input_mem;
+
+ prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
+
+ return (void *)prog;
+}
+
+static void
+nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->compprog = hwcso;
+ nvc0->dirty_cp |= NVC0_NEW_CP_PROGRAM;
+}
+
static void
nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_constant_buffer *cb)
@@ -653,14 +794,22 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
const unsigned s = nvc0_shader_stage(shader);
const unsigned i = index;
- if (shader == PIPE_SHADER_COMPUTE)
- return;
+ if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
+ assert(!cb || !cb->user_buffer);
+ if (nvc0->constbuf[s][i].u.buf)
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
- if (nvc0->constbuf[s][i].user)
- nvc0->constbuf[s][i].u.buf = NULL;
- else
- if (nvc0->constbuf[s][i].u.buf)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
+ } else {
+ if (nvc0->constbuf[s][i].user)
+ nvc0->constbuf[s][i].u.buf = NULL;
+ else
+ if (nvc0->constbuf[s][i].u.buf)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ }
+ nvc0->constbuf_dirty[s] |= 1 << i;
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
@@ -673,10 +822,6 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nvc0->constbuf[s][i].offset = cb->buffer_offset;
nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100);
}
-
- nvc0->constbuf_dirty[s] |= 1 << i;
-
- nvc0->dirty |= NVC0_NEW_CONSTBUF;
}
/* =============================================================================
@@ -919,6 +1064,113 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
}
+static void
+nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t,
+ unsigned start, unsigned nr,
+ struct pipe_surface **psurfaces)
+{
+ const unsigned end = start + nr;
+ const unsigned mask = ((1 << nr) - 1) << start;
+ unsigned i;
+
+ if (psurfaces) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (psurfaces[p])
+ nvc0->surfaces_valid[t] |= (1 << i);
+ else
+ nvc0->surfaces_valid[t] &= ~(1 << i);
+ pipe_surface_reference(&nvc0->surfaces[t][i], psurfaces[p]);
+ }
+ } else {
+ for (i = start; i < end; ++i)
+ pipe_surface_reference(&nvc0->surfaces[t][i], NULL);
+ nvc0->surfaces_valid[t] &= ~mask;
+ }
+ nvc0->surfaces_dirty[t] |= mask;
+
+ if (t == 0)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF);
+ else
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
+}
+
+static void
+nvc0_set_compute_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ nvc0_bind_surfaces_range(nvc0_context(pipe), 1, start, nr, resources);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SURFACES;
+}
+
+static void
+nvc0_set_shader_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources);
+
+ nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
+}
+
+static INLINE void
+nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
+{
+ struct nv04_resource *buf = nv04_resource(res);
+ if (buf) {
+ uint64_t limit = (buf->address + buf->base.width0) - 1;
+ if (limit < (1ULL << 32)) {
+ *phandle = (uint32_t)buf->address;
+ } else {
+ NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
+ "resource not contained within 32-bit address space !\n");
+ *phandle = 0;
+ }
+ } else {
+ *phandle = 0;
+ }
+}
+
+static void
+nvc0_set_global_bindings(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct pipe_resource **ptr;
+ unsigned i;
+ const unsigned end = start + nr;
+
+ if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
+ const unsigned old_size = nvc0->global_residents.size;
+ const unsigned req_size = end * sizeof(struct pipe_resource *);
+ util_dynarray_resize(&nvc0->global_residents, req_size);
+ memset((uint8_t *)nvc0->global_residents.data + old_size, 0,
+ req_size - old_size);
+ }
+
+ if (resources) {
+ ptr = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i) {
+ pipe_resource_reference(&ptr[i], resources[i]);
+ nvc0_set_global_handle(handles[i], resources[i]);
+ }
+ } else {
+ ptr = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i)
+ pipe_resource_reference(&ptr[i], NULL);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
+
+ nvc0->dirty_cp = NVC0_NEW_CP_GLOBALS;
+}
+
void
nvc0_init_state_functions(struct nvc0_context *nvc0)
{
@@ -941,12 +1193,14 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
+ pipe->bind_compute_sampler_states = nvc0_cp_sampler_states_bind;
pipe->create_sampler_view = nvc0_create_sampler_view;
pipe->sampler_view_destroy = nvc0_sampler_view_destroy;
pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views;
pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views;
pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views;
+ pipe->set_compute_sampler_views = nvc0_cp_set_sampler_views;
pipe->create_vs_state = nvc0_vp_state_create;
pipe->create_fs_state = nvc0_fp_state_create;
@@ -958,6 +1212,10 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->delete_fs_state = nvc0_sp_state_delete;
pipe->delete_gs_state = nvc0_sp_state_delete;
+ pipe->create_compute_state = nvc0_cp_state_create;
+ pipe->bind_compute_state = nvc0_cp_state_bind;
+ pipe->delete_compute_state = nvc0_sp_state_delete;
+
pipe->set_blend_color = nvc0_set_blend_color;
pipe->set_stencil_ref = nvc0_set_stencil_ref;
pipe->set_clip_state = nvc0_set_clip_state;
@@ -978,5 +1236,9 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->create_stream_output_target = nvc0_so_target_create;
pipe->stream_output_target_destroy = nvc0_so_target_destroy;
pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets;
+
+ pipe->set_global_binding = nvc0_set_global_bindings;
+ pipe->set_compute_resources = nvc0_set_compute_resources;
+ pipe->set_shader_resources = nvc0_set_shader_resources;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
index 80a8c01a51a..1f12de61e9f 100644
--- a/src/gallium/drivers/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -430,6 +430,21 @@ nvc0_validate_sample_mask(struct nvc0_context *nvc0)
PUSH_DATA (push, 0x01);
}
+void
+nvc0_validate_global_residents(struct nvc0_context *nvc0,
+ struct nouveau_bufctx *bctx, int bin)
+{
+ unsigned i;
+
+ for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource *res = *util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, i);
+ if (res)
+ nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR);
+ }
+}
+
static void
nvc0_validate_derived_1(struct nvc0_context *nvc0)
{
@@ -513,6 +528,7 @@ static struct state_validate {
{ nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
{ nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
+ { nvc0_validate_surfaces, NVC0_NEW_SURFACES },
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
};
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index 281d740b218..77330c52ac5 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -515,7 +515,7 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
blit->vp.code_size = sizeof(code_nvc0);
}
- blit->vp.max_gpr = 7;
+ blit->vp.num_gprs = 7;
blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
blit->vp.hdr[0] = 0x00020461; /* vertprog magic */
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
index 2bce97b32c7..7fbe1e6736b 100644
--- a/src/gallium/drivers/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -23,6 +23,7 @@
#include "nvc0_context.h"
#include "nvc0_resource.h"
#include "nv50/nv50_texture.xml.h"
+#include "nv50/nv50_defs.xml.h"
#include "util/u_format.h"
@@ -413,7 +414,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
return need_flush;
}
-static boolean
+boolean
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
{
struct nouveau_bo *txc = nvc0->screen->txc;
@@ -515,3 +516,295 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
nvc0->samplers_dirty[s] = 0;
}
}
+
+
+static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
+static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
+static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
+
+void
+nve4_set_surface_info(struct nouveau_pushbuf *push,
+ struct pipe_surface *psf,
+ struct nvc0_screen *screen)
+{
+ struct nv50_surface *sf = nv50_surface(psf);
+ struct nv04_resource *res;
+ uint64_t address;
+ uint32_t *const info = push->cur;
+ uint8_t log2cpp;
+
+ if (psf && !nve4_su_format_map[psf->format])
+ NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
+
+ push->cur += 16;
+
+ if (!psf || !nve4_su_format_map[psf->format]) {
+ memset(info, 0, 16 * sizeof(*info));
+
+ info[0] = 0xbadf0000;
+ info[1] = 0x80004000;
+ info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
+ screen->lib_code->start;
+ return;
+ }
+ res = nv04_resource(sf->base.texture);
+
+ address = res->address + sf->offset;
+
+ info[8] = sf->width;
+ info[9] = sf->height;
+ info[10] = sf->depth;
+ switch (res->base.target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ info[11] = 1;
+ break;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ info[11] = 2;
+ break;
+ case PIPE_TEXTURE_3D:
+ info[11] = 3;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ info[11] = 4;
+ break;
+ default:
+ info[11] = 0;
+ break;
+ }
+ log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12;
+
+ info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start;
+
+ /* limit in bytes for raw access */
+ info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1);
+
+ info[1] = nve4_su_format_map[sf->base.format];
+
+#if 0
+ switch (util_format_get_blocksizebits(sf->base.format)) {
+ case 16: info[1] |= 1 << 16; break;
+ case 32: info[1] |= 2 << 16; break;
+ case 64: info[1] |= 3 << 16; break;
+ case 128: info[1] |= 4 << 16; break;
+ default:
+ break;
+ }
+#else
+ info[1] |= log2cpp << 16;
+ info[1] |= 0x4000;
+ info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]);
+#endif
+
+ if (res->base.target == PIPE_BUFFER) {
+ info[0] = address >> 8;
+ info[2] = sf->width - 1;
+ info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+ info[3] = 0;
+ info[4] = 0;
+ info[5] = 0;
+ info[6] = 0;
+ info[7] = 0;
+ info[14] = 0;
+ info[15] = 0;
+ } else {
+ struct nv50_miptree *mt = nv50_miptree(&res->base);
+ struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level];
+ const unsigned z = sf->base.u.tex.first_layer;
+
+ if (z) {
+ if (mt->layout_3d) {
+ address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z);
+ /* doesn't work if z passes z-tile boundary */
+ assert(sf->depth == 1);
+ } else {
+ address += mt->layer_stride * z;
+ }
+ }
+ info[0] = address >> 8;
+ info[2] = sf->width - 1;
+ /* NOTE: this is really important: */
+ info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+ info[3] = (0x88 << 24) | (lvl->pitch / 64);
+ info[4] = sf->height - 1;
+ info[4] |= (lvl->tile_mode & 0x0f0) << 25;
+ info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
+ info[5] = mt->layer_stride >> 8;
+ info[6] = sf->depth - 1;
+ info[6] |= (lvl->tile_mode & 0xf00) << 21;
+ info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
+ info[7] = 0;
+ info[14] = mt->ms_x;
+ info[15] = mt->ms_y;
+ }
+}
+
+static INLINE void
+nvc0_update_surface_bindings(struct nvc0_context *nvc0)
+{
+ /* TODO */
+}
+
+static INLINE void
+nve4_update_surface_bindings(struct nvc0_context *nvc0)
+{
+ /* TODO */
+}
+
+void
+nvc0_validate_surfaces(struct nvc0_context *nvc0)
+{
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ nve4_update_surface_bindings(nvc0);
+ } else {
+ nvc0_update_surface_bindings(nvc0);
+ }
+}
+
+
+static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */
+ [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT,
+ [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT,
+ [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT,
+ [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT,
+ [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM,
+ [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT,
+ [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT,
+ [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM,
+ [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT,
+ [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT,
+ [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT,
+ [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT,
+ [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT,
+};
+
+/* Auxiliary format description values for surface instructions.
+ * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
+ */
+static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
+
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
+
+ [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
+ [PIPE_FORMAT_R32G32_SINT] = 0x3433,
+ [PIPE_FORMAT_R32G32_UINT] = 0x3433,
+
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
+
+ [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
+ [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
+ [PIPE_FORMAT_R16G16_SINT] = 0x2524,
+ [PIPE_FORMAT_R16G16_UINT] = 0x2524,
+ [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
+
+ [PIPE_FORMAT_R32_SINT] = 0x2024,
+ [PIPE_FORMAT_R32_UINT] = 0x2024,
+ [PIPE_FORMAT_R32_FLOAT] = 0x2024,
+
+ [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
+ [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
+ [PIPE_FORMAT_R8G8_SINT] = 0x1615,
+ [PIPE_FORMAT_R8G8_UINT] = 0x1615,
+
+ [PIPE_FORMAT_R16_UNORM] = 0x1115,
+ [PIPE_FORMAT_R16_SNORM] = 0x1115,
+ [PIPE_FORMAT_R16_SINT] = 0x1115,
+ [PIPE_FORMAT_R16_UINT] = 0x1115,
+ [PIPE_FORMAT_R16_FLOAT] = 0x1115,
+
+ [PIPE_FORMAT_R8_UNORM] = 0x0206,
+ [PIPE_FORMAT_R8_SNORM] = 0x0206,
+ [PIPE_FORMAT_R8_SINT] = 0x0206,
+ [PIPE_FORMAT_R8_UINT] = 0x0206
+};
+
+/* NOTE: These are hardcoded offsets for the shader library.
+ * TODO: Automate them.
+ */
+static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
+ [PIPE_FORMAT_R32G32_FLOAT] = 0x428,
+ [PIPE_FORMAT_R32G32_SINT] = 0x468,
+ [PIPE_FORMAT_R32G32_UINT] = 0x468,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
+ [PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
+ [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
+ [PIPE_FORMAT_R16G16_UNORM] = 0x828,
+ [PIPE_FORMAT_R16G16_SNORM] = 0x890,
+ [PIPE_FORMAT_R16G16_SINT] = 0x8f0,
+ [PIPE_FORMAT_R16G16_UINT] = 0x948,
+ [PIPE_FORMAT_R16G16_FLOAT] = 0x998,
+ [PIPE_FORMAT_R32_FLOAT] = 0x9e8,
+ [PIPE_FORMAT_R32_SINT] = 0xa30,
+ [PIPE_FORMAT_R32_UINT] = 0xa30,
+ [PIPE_FORMAT_R8G8_UNORM] = 0xa78,
+ [PIPE_FORMAT_R8G8_SNORM] = 0xae0,
+ [PIPE_FORMAT_R8G8_UINT] = 0xb48,
+ [PIPE_FORMAT_R8G8_SINT] = 0xb98,
+ [PIPE_FORMAT_R16_UNORM] = 0xbe8,
+ [PIPE_FORMAT_R16_SNORM] = 0xc48,
+ [PIPE_FORMAT_R16_SINT] = 0xca0,
+ [PIPE_FORMAT_R16_UINT] = 0xce8,
+ [PIPE_FORMAT_R16_FLOAT] = 0xd30,
+ [PIPE_FORMAT_R8_UNORM] = 0xd88,
+ [PIPE_FORMAT_R8_SNORM] = 0xde0,
+ [PIPE_FORMAT_R8_SINT] = 0xe38,
+ [PIPE_FORMAT_R8_UINT] = 0xe88,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
+};
diff --git a/src/gallium/drivers/nvc0/nve4_compute.c b/src/gallium/drivers/nvc0/nve4_compute.c
new file mode 100644
index 00000000000..e823d210952
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nve4_compute.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright 2012 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#include "nvc0_context.h"
+#include "nve4_compute.h"
+
+#include "nv50/codegen/nv50_ir_driver.h"
+
+static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
+
+
+int
+nve4_screen_compute_setup(struct nvc0_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_device *dev = screen->base.device;
+ struct nouveau_object *chan = screen->base.channel;
+ unsigned i;
+ int ret;
+ uint32_t obj_class;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
+ break;
+ case 0xe0:
+ obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ break;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
+ return ret;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL,
+ &screen->parm);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->oclass);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ /* No idea why there are 2. Divide size by 2 to be safe.
+ * Actually this might be per-MP TEMP size and looks like I'm only using
+ * 2 MPs instead of all 8.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(0)), 3);
+ PUSH_DATAh(push, screen->tls_size / 2);
+ PUSH_DATA (push, screen->tls_size / 2);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(1)), 3);
+ PUSH_DATAh(push, screen->tls_size / 2);
+ PUSH_DATA (push, screen->tls_size / 2);
+ PUSH_DATA (push, 0xff);
+
+ /* Unified address space ? Who needs that ? Certainly not OpenCL.
+ *
+ * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
+ * accessible. We cannot prevent that at the moment, so expect failure.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1);
+ PUSH_DATA (push, 1 << 24);
+ BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1);
+ PUSH_DATA (push, 2 << 24);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1);
+ PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
+
+ /* NOTE: these do not affect the state used by the 3D object */
+ BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+ BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
+
+ if (obj_class >= NVF0_COMPUTE_CLASS) {
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1);
+ PUSH_DATA (push, 0x100);
+ BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63);
+ for (i = 63; i >= 1; --i)
+ PUSH_DATA(push, 0x38000 | i);
+ IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
+ IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0);
+ }
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1);
+ PUSH_DATA (push, 0); /* does not interefere with 3D */
+
+ if (obj_class >= NVF0_COMPUTE_CLASS)
+ IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+
+ /* MS sample coordinate offsets: these do not work with _ALT modes ! */
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
+ PUSH_DATA (push, 64);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
+ PUSH_DATA (push, 0); /* 0 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1); /* 1 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0); /* 2 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 1); /* 3 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 2); /* 4 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 3); /* 5 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 2); /* 6 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 3); /* 7 */
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+
+ return 0;
+}
+
+
+static void
+nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_surface *sf;
+ struct nv04_resource *res;
+ uint32_t mask;
+ unsigned i;
+ const unsigned t = 1;
+
+ mask = nvc0->surfaces_dirty[t];
+ while (mask) {
+ i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+
+ /*
+ * NVE4's surface load/store instructions receive all the information
+ * directly instead of via binding points, so we have to supply them.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
+ PUSH_DATA (push, 64);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
+
+ nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
+
+ sf = nv50_surface(nvc0->surfaces[t][i]);
+ if (sf) {
+ res = nv04_resource(sf->base.texture);
+
+ if (sf->base.writable)
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
+ else
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
+ }
+ }
+ if (nvc0->surfaces_dirty[t]) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+ }
+
+ /* re-reference non-dirty surfaces */
+ mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
+ while (mask) {
+ i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+
+ sf = nv50_surface(nvc0->surfaces[t][i]);
+ res = nv04_resource(sf->base.texture);
+
+ if (sf->base.writable)
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
+ else
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
+ }
+
+ nvc0->surfaces_dirty[t] = 0;
+}
+
+
+/* Thankfully, textures with samplers follow the normal rules. */
+static void
+nve4_compute_validate_samplers(struct nvc0_context *nvc0)
+{
+ boolean need_flush = nve4_validate_tsc(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+/* (Code duplicated at bottom for various non-convincing reasons.
+ * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
+ * entries to avoid a subchannel switch.
+ * Same for texture cache flushes.
+ * Also, the bufctx differs, and more IFs in the 3D version looks ugly.)
+ */
+static void nve4_compute_validate_textures(struct nvc0_context *);
+
+static void
+nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint64_t address;
+ const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
+ unsigned i, n;
+ uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
+
+ if (!dirty)
+ return;
+ i = ffs(dirty) - 1;
+ n = util_logbase2(dirty) + 1 - i;
+ assert(n);
+
+ address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
+ PUSH_DATA (push, n * 4);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
+ PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+
+ nvc0->textures_dirty[s] = 0;
+ nvc0->samplers_dirty[s] = 0;
+}
+
+
+static boolean
+nve4_compute_validate_program(struct nvc0_context *nvc0)
+{
+ struct nvc0_program *prog = nvc0->compprog;
+
+ if (prog->mem)
+ return TRUE;
+
+ if (!prog->translated) {
+ prog->translated = nvc0_program_translate(
+ prog, nvc0->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ }
+ if (unlikely(!prog->code_size))
+ return FALSE;
+
+ if (likely(prog->code_size)) {
+ if (nvc0_program_upload_code(nvc0, prog)) {
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CODE);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+static boolean
+nve4_compute_state_validate(struct nvc0_context *nvc0)
+{
+ if (!nve4_compute_validate_program(nvc0))
+ return FALSE;
+ if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
+ nve4_compute_validate_textures(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
+ nve4_compute_validate_samplers(nvc0);
+ if (nvc0->dirty_cp & (NVC0_NEW_CP_TEXTURES | NVC0_NEW_CP_SAMPLERS))
+ nve4_compute_set_tex_handles(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES)
+ nve4_compute_validate_surfaces(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
+ nvc0_validate_global_residents(nvc0,
+ nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
+ return FALSE;
+ if (unlikely(nvc0->state.flushed))
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+
+ return TRUE;
+}
+
+
+static void
+nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *cp = nvc0->compprog;
+
+ if (!cp->parm_size)
+ return;
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
+ PUSH_DATA (push, cp->parm_size);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+}
+
+static INLINE uint8_t
+nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
+{
+ if (shared_size > (32 << 10))
+ return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
+ if (shared_size > (16 << 10))
+ return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
+ return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
+}
+
+static void
+nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
+ struct nve4_cp_launch_desc *desc,
+ uint32_t label,
+ const uint *block_layout,
+ const uint *grid_layout)
+{
+ const struct nvc0_screen *screen = nvc0->screen;
+ const struct nvc0_program *cp = nvc0->compprog;
+ unsigned i;
+
+ nve4_cp_launch_desc_init_default(desc);
+
+ desc->entry = nvc0_program_symbol_offset(cp, label);
+
+ desc->griddim_x = grid_layout[0];
+ desc->griddim_y = grid_layout[1];
+ desc->griddim_z = grid_layout[2];
+ desc->blockdim_x = block_layout[0];
+ desc->blockdim_y = block_layout[1];
+ desc->blockdim_z = block_layout[2];
+
+ desc->shared_size = align(cp->cp.smem_size, 0x100);
+ desc->local_size_p = align(cp->cp.lmem_size, 0x10);
+ desc->local_size_n = 0;
+ desc->cstack_size = 0x800;
+ desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
+
+ desc->gpr_alloc = cp->num_gprs;
+ desc->bar_alloc = cp->num_barriers;
+
+ for (i = 0; i < 7; ++i) {
+ const unsigned s = 5;
+ if (nvc0->constbuf[s][i].u.buf)
+ nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
+ }
+ nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
+}
+
+static INLINE struct nve4_cp_launch_desc *
+nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
+ struct nouveau_bo **pbo, uint64_t *pgpuaddr)
+{
+ uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo);
+ if (!ptr)
+ return NULL;
+ if (*pgpuaddr & 255) {
+ unsigned adj = 256 - (*pgpuaddr & 255);
+ ptr += adj;
+ *pgpuaddr += adj;
+ }
+ return (struct nve4_cp_launch_desc *)ptr;
+}
+
+void
+nve4_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label,
+ const void *input)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nve4_cp_launch_desc *desc;
+ uint64_t desc_gpuaddr;
+ struct nouveau_bo *desc_bo;
+ int ret;
+
+ desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
+ if (!desc)
+ goto out;
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ desc_bo);
+
+ ret = !nve4_compute_state_validate(nvc0);
+ if (ret)
+ goto out;
+
+ nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
+ nve4_compute_dump_launch_desc(desc);
+
+ nve4_compute_upload_input(nvc0, input);
+
+ /* upload descriptor and flush */
+#if 0
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, desc_gpuaddr);
+ PUSH_DATA (push, desc_gpuaddr);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
+ PUSH_DATA (push, 256);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4));
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DESC);
+ PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
+#endif
+ BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1);
+ PUSH_DATA (push, desc_gpuaddr >> 8);
+ BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x3);
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+out:
+ if (ret)
+ NOUVEAU_ERR("Failed to launch grid !\n");
+ nouveau_scratch_done(&nvc0->base);
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
+}
+
+
+#define NVE4_TIC_ENTRY_INVALID 0x000fffff
+
+static void
+nve4_compute_validate_textures(struct nvc0_context *nvc0)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const unsigned s = 5;
+ unsigned i;
+ uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
+ unsigned n[2] = { 0, 0 };
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ commands[0][n[0]++] = (tic->id << 4) | 1;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ commands[1][n[1]++] = (tic->id << 4) | 1;
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tic->id;
+ if (dirty)
+ BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i)
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+
+ if (n[0]) {
+ BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]);
+ PUSH_DATAp(push, commands[0], n[0]);
+ }
+ if (n[1]) {
+ BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]);
+ PUSH_DATAp(push, commands[1], n[1]);
+ }
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+}
+
+
+static const char *nve4_cache_split_name(unsigned value)
+{
+ switch (value) {
+ case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
+ case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
+ case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
+ default:
+ return "(invalid)";
+ }
+}
+
+static void
+nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
+{
+ const uint32_t *data = (const uint32_t *)desc;
+ unsigned i;
+ boolean zero = FALSE;
+
+ debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
+
+ for (i = 0; i < sizeof(*desc); i += 4) {
+ if (data[i / 4]) {
+ debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
+ zero = FALSE;
+ } else
+ if (!zero) {
+ debug_printf("...\n");
+ zero = TRUE;
+ }
+ }
+
+ debug_printf("entry = 0x%x\n", desc->entry);
+ debug_printf("grid dimensions = %ux%ux%u\n",
+ desc->griddim_x, desc->griddim_y, desc->griddim_z);
+ debug_printf("block dimensions = %ux%ux%u\n",
+ desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
+ debug_printf("s[] size: 0x%x\n", desc->shared_size);
+ debug_printf("l[] size: -0x%x / +0x%x\n",
+ desc->local_size_n, desc->local_size_p);
+ debug_printf("stack size: 0x%x\n", desc->cstack_size);
+ debug_printf("barrier count: %u\n", desc->bar_alloc);
+ debug_printf("$r count: %u\n", desc->gpr_alloc);
+ debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
+
+ for (i = 0; i < 8; ++i) {
+ uint64_t address;
+ uint32_t size = desc->cb[i].size;
+ boolean valid = !!(desc->cb_mask & (1 << i));
+
+ address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
+
+ if (!valid && !address && !size)
+ continue;
+ debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
+ i, address, size, valid ? "" : " (invalid)");
+ }
+}
+
diff --git a/src/gallium/drivers/nvc0/nve4_compute.h b/src/gallium/drivers/nvc0/nve4_compute.h
new file mode 100644
index 00000000000..82a77480c35
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nve4_compute.h
@@ -0,0 +1,110 @@
+
+#ifndef NVE4_COMPUTE_H
+#define NVE4_COMPUTE_H
+
+#include "nv50/nv50_defs.xml.h"
+#include "nve4_compute.xml.h"
+
+/* Input space is implemented as c0[], to which we bind the screen->parm bo.
+ */
+#define NVE4_CP_INPUT_USER 0x0000
+#define NVE4_CP_INPUT_USER_LIMIT 0x1000
+#define NVE4_CP_INPUT_TEX(i) (0x1020 + (i) * 4)
+#define NVE4_CP_INPUT_TEX_STRIDE 4
+#define NVE4_CP_INPUT_TEX_MAX 32
+#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
+#define NVE4_CP_INPUT_SUF_STRIDE 64
+#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
+#define NVE4_CP_INPUT_SUF_MAX 32
+#define NVE4_CP_INPUT_SIZE 0x1900
+#define NVE4_CP_PARAM_SIZE 0x2000
+
+struct nve4_cp_launch_desc
+{
+ u32 unk0[8];
+ u32 entry;
+ u32 unk9[3];
+ u32 griddim_x : 31;
+ u32 unk12 : 1;
+ u16 griddim_y;
+ u16 griddim_z;
+ u32 unk14[3];
+ u16 shared_size; /* must be aligned to 0x100 */
+ u16 unk15;
+ u16 unk16;
+ u16 blockdim_x;
+ u16 blockdim_y;
+ u16 blockdim_z;
+ u32 cb_mask : 8;
+ u32 unk20_8 : 21;
+ u32 cache_split : 2;
+ u32 unk20_31 : 1;
+ u32 unk21[8];
+ struct {
+ u32 address_l;
+ u32 address_h : 8;
+ u32 reserved : 7;
+ u32 size : 17;
+ } cb[8];
+ u32 local_size_p : 20;
+ u32 unk45_20 : 7;
+ u32 bar_alloc : 5;
+ u32 local_size_n : 20;
+ u32 unk46_20 : 4;
+ u32 gpr_alloc : 8;
+ u32 cstack_size : 20;
+ u32 unk47_20 : 12;
+ u32 unk48[16];
+};
+
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA 0x41
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DESC 0x11
+#define NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL 0x1
+
+static INLINE void
+nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
+{
+ memset(desc, 0, sizeof(*desc));
+
+ desc->unk0[7] = 0xbc000000;
+ desc->unk9[2] = 0x44014000;
+ desc->unk47_20 = 0x300;
+}
+
+static INLINE void
+nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
+ unsigned index,
+ struct nouveau_bo *bo,
+ uint32_t base, uint16_t size)
+{
+ uint64_t address = bo->offset + base;
+
+ assert(index < 8);
+ assert(!(base & 0xff));
+ assert(size <= 65536);
+
+ desc->cb[index].address_l = address;
+ desc->cb[index].address_h = address >> 32;
+ desc->cb[index].size = size;
+
+ desc->cb_mask |= 1 << index;
+}
+
+static INLINE void
+nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
+ unsigned index,
+ const struct nvc0_constbuf *cb)
+{
+ assert(index < 8);
+
+ if (!cb->u.buf) {
+ desc->cb_mask &= ~(1 << index);
+ } else {
+ const struct nv04_resource *buf = nv04_resource(cb->u.buf);
+ assert(!cb->user);
+ nve4_cp_launch_desc_set_cb(desc, index,
+ buf->bo, buf->offset + cb->offset, cb->size);
+ }
+}
+
+#endif /* NVE4_COMPUTE_H */
diff --git a/src/gallium/drivers/nvc0/nve4_compute.xml.h b/src/gallium/drivers/nvc0/nve4_compute.xml.h
new file mode 100644
index 00000000000..e513ae7eb86
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nve4_compute.xml.h
@@ -0,0 +1,269 @@
+#ifndef NVE4_COMPUTE_XML
+#define NVE4_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nve4_compute.xml ( 6352 bytes, from 2013-03-10 14:59:45)
+- copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- nvchipsets.xml ( 3870 bytes, from 2013-03-08 12:41:50)
+- nv_object.xml ( 13238 bytes, from 2013-02-07 16:35:34)
+- nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
+- nv50_defs.xml ( 7783 bytes, from 2013-03-08 12:42:29)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Koƛcielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVE4_COMPUTE_UPLOAD_SIZE 0x00000180
+
+#define NVE4_COMPUTE_UPLOAD_UNK0184 0x00000184
+
+#define NVE4_COMPUTE_UPLOAD_ADDRESS_HIGH 0x00000188
+
+#define NVE4_COMPUTE_UPLOAD_ADDRESS_LOW 0x0000018c
+
+#define NVE4_COMPUTE_UNK01A0 0x000001a0
+
+#define NVE4_COMPUTE_UNK01A4 0x000001a4
+
+#define NVE4_COMPUTE_UNK01A8 0x000001a8
+
+#define NVE4_COMPUTE_UNK01AC 0x000001ac
+
+#define NVE4_COMPUTE_UPLOAD_EXEC 0x000001b0
+
+#define NVE4_COMPUTE_UPLOAD_DATA 0x000001b4
+
+#define NVE4_COMPUTE_SHARED_BASE 0x00000214
+
+#define NVE4_COMPUTE_MEM_BARRIER 0x0000021c
+
+#define NVE4_COMPUTE_UNK0280 0x00000280
+
+#define NVE4_COMPUTE_UNK02B0 0x000002b0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS 0x000002b4
+#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS__SHR 8
+
+#define NVE4_COMPUTE_UNK02B8 0x000002b8
+
+#define NVE4_COMPUTE_LAUNCH 0x000002bc
+
+#define NVE4_COMPUTE_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0))
+#define NVE4_COMPUTE_TEMP_SIZE__ESIZE 0x0000000c
+#define NVE4_COMPUTE_TEMP_SIZE__LEN 0x00000002
+
+#define NVE4_COMPUTE_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0))
+
+#define NVE4_COMPUTE_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0))
+
+#define NVE4_COMPUTE_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0))
+
+#define NVE4_COMPUTE_UNK0310 0x00000310
+
+#define NVE4_COMPUTE_LOCAL_BASE 0x0000077c
+
+#define NVE4_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790
+
+#define NVE4_COMPUTE_TEMP_ADDRESS_LOW 0x00000794
+
+#define NVE4_COMPUTE_WATCHDOG_TIMER 0x00000de4
+
+#define NVE4_COMPUTE_LINKED_TSC 0x00001234
+
+#define NVE4_COMPUTE_TSC_FLUSH 0x00001330
+#define NVE4_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_TIC_FLUSH 0x00001334
+#define NVE4_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_TEX_CACHE_CTL 0x00001338
+#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007
+#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0
+#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_COND_ADDRESS_HIGH 0x00001550
+
+#define NVE4_COMPUTE_COND_ADDRESS_LOW 0x00001554
+
+#define NVE4_COMPUTE_COND_MODE 0x00001558
+#define NVE4_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NVE4_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NVE4_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVE4_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NVE4_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVE4_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVE4_COMPUTE_TSC_ADDRESS_LOW 0x00001560
+
+#define NVE4_COMPUTE_TSC_LIMIT 0x00001564
+
+#define NVE4_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVE4_COMPUTE_TIC_ADDRESS_LOW 0x00001578
+
+#define NVE4_COMPUTE_TIC_LIMIT 0x0000157c
+
+#define NVE4_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVE4_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVE4_COMPUTE_FLUSH 0x00001698
+#define NVE4_COMPUTE_FLUSH_CODE 0x00000001
+#define NVE4_COMPUTE_FLUSH_GLOBAL 0x00000010
+#define NVE4_COMPUTE_FLUSH_UNK8 0x00000100
+#define NVE4_COMPUTE_FLUSH_CB 0x00001000
+
+#define NVE4_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVE4_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVE4_COMPUTE_QUERY_SEQUENCE 0x00001b08
+
+#define NVE4_COMPUTE_QUERY_GET 0x00001b0c
+#define NVE4_COMPUTE_QUERY_GET_MODE__MASK 0x00000003
+#define NVE4_COMPUTE_QUERY_GET_MODE__SHIFT 0
+#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000
+#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003
+#define NVE4_COMPUTE_QUERY_GET_INTR 0x00100000
+#define NVE4_COMPUTE_QUERY_GET_SHORT 0x10000000
+
+#define NVE4_COMPUTE_TEX_CB_INDEX 0x00002608
+
+#define NVE4_COMPUTE_UNK260c 0x0000260c
+
+#define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100
+#define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020
+
+#define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030
+#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__MASK 0x7fffffff
+#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ 0x00000034
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_17 0x00000044
+#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_18 0x00000048
+#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ 0x0000004c
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_20 0x00000050
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__MASK 0x000000ff
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__MASK 0x60000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__SHIFT 29
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_16K_SHARED_48K_L1 0x20000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_32K_SHARED_32K_L1 0x40000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_48K_SHARED_16K_L1 0x60000000
+
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0(i0) (0x00000074 + 0x8*(i0))
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__ESIZE 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__LEN 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__MASK 0xffffffff
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1(i0) (0x00000078 + 0x8*(i0))
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__ESIZE 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__LEN 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__MASK 0x000000ff
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__MASK 0xffff8000
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__SHIFT 15
+
+#define NVE4_COMPUTE_LAUNCH_DESC_45 0x000000b4
+#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__MASK 0xf8000000
+#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__SHIFT 27
+
+#define NVE4_COMPUTE_LAUNCH_DESC_46 0x000000b8
+#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__MASK 0x3f000000
+#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__SHIFT 24
+
+#define NVE4_COMPUTE_LAUNCH_DESC_47 0x000000bc
+#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__SHIFT 0
+
+
+#endif /* NVE4_COMPUTE_XML */