summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/ddebug/dd_context.c28
-rw-r--r--src/gallium/drivers/ddebug/dd_pipe.h2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h11
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h277
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h1042
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.h2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_query.c138
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h13
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h42
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.h12
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.c33
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.h6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c17
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.h1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c1
-rw-r--r--src/gallium/drivers/ilo/ilo_gpgpu.c8
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_state.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c2
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources5
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp8
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp61
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp5
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp47
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp41
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp103
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp35
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp10
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp1
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/g80_defs.xml.h279
-rw-r--r--src/gallium/drivers/nouveau/nv50/g80_texture.xml.h451
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.c16
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h263
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_formats.c744
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c12
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h18
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c69
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_tex.c92
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h306
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_transfer.c6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c11
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h365
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/Makefile2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme24
-rw-r--r--src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h19
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c287
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c60
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h97
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_macros.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c40
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c51
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h21
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c12
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c104
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c144
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c85
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c387
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c44
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c124
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.h1
-rw-r--r--src/gallium/drivers/r300/r300_screen.c6
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c15
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c3
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h3
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c11
-rw-r--r--src/gallium/drivers/radeon/radeon_elf_util.c26
-rw-r--r--src/gallium/drivers/radeon/radeon_elf_util.h14
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h7
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_util.c6
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c31
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c31
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c35
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h26
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c1784
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h155
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c383
-rw-r--r--src/gallium/drivers/svga/svga_draw.c10
-rw-r--r--src/gallium/drivers/svga/svga_screen.c9
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c4
-rw-r--r--src/gallium/drivers/trace/tr_context.c228
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c60
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.h4
-rw-r--r--src/gallium/drivers/trace/tr_screen.c25
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c3
112 files changed, 7006 insertions, 2077 deletions
diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c
index 3ae7764ff3f..9dfaa0af289 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context *_pipe,
pipe->sampler_view_destroy(pipe, view);
}
-static struct pipe_image_view *
-dd_context_create_image_view(struct pipe_context *_pipe,
- struct pipe_resource *resource,
- const struct pipe_image_view *templ)
-{
- struct pipe_context *pipe = dd_context(_pipe)->pipe;
- struct pipe_image_view *view =
- pipe->create_image_view(pipe, resource, templ);
-
- if (!view)
- return NULL;
- view->context = _pipe;
- return view;
-}
-
-static void
-dd_context_image_view_destroy(struct pipe_context *_pipe,
- struct pipe_image_view *view)
-{
- struct pipe_context *pipe = dd_context(_pipe)->pipe;
-
- pipe->image_view_destroy(pipe, view);
-}
-
static struct pipe_stream_output_target *
dd_context_create_stream_output_target(struct pipe_context *_pipe,
struct pipe_resource *res,
@@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader,
static void
dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
unsigned start, unsigned num,
- struct pipe_image_view **views)
+ struct pipe_image_view *views)
{
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
@@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
CTX_INIT(sampler_view_destroy);
CTX_INIT(create_surface);
CTX_INIT(surface_destroy);
- CTX_INIT(create_image_view);
- CTX_INIT(image_view_destroy);
CTX_INIT(transfer_map);
CTX_INIT(transfer_flush_region);
CTX_INIT(transfer_unmap);
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h
index 80098dcb644..c9bbd569abe 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -94,7 +94,7 @@ struct dd_context
struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+ struct pipe_image_view shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
struct dd_state *velems;
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index d23111352b7..71ee55054d3 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index c4f253b836c..c6286a1f290 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -255,11 +256,273 @@ enum a3xx_color_fmt {
RB_R32G32B32A32_UINT = 59,
};
+enum a3xx_cp_perfcounter_select {
+ CP_ALWAYS_COUNT = 0,
+ CP_AHB_PFPTRANS_WAIT = 3,
+ CP_AHB_NRTTRANS_WAIT = 6,
+ CP_CSF_NRT_READ_WAIT = 8,
+ CP_CSF_I1_FIFO_FULL = 9,
+ CP_CSF_I2_FIFO_FULL = 10,
+ CP_CSF_ST_FIFO_FULL = 11,
+ CP_RESERVED_12 = 12,
+ CP_CSF_RING_ROQ_FULL = 13,
+ CP_CSF_I1_ROQ_FULL = 14,
+ CP_CSF_I2_ROQ_FULL = 15,
+ CP_CSF_ST_ROQ_FULL = 16,
+ CP_RESERVED_17 = 17,
+ CP_MIU_TAG_MEM_FULL = 18,
+ CP_MIU_NRT_WRITE_STALLED = 22,
+ CP_MIU_NRT_READ_STALLED = 23,
+ CP_ME_REGS_RB_DONE_FIFO_FULL = 26,
+ CP_ME_REGS_VS_EVENT_FIFO_FULL = 27,
+ CP_ME_REGS_PS_EVENT_FIFO_FULL = 28,
+ CP_ME_REGS_CF_EVENT_FIFO_FULL = 29,
+ CP_ME_MICRO_RB_STARVED = 30,
+ CP_AHB_RBBM_DWORD_SENT = 40,
+ CP_ME_BUSY_CLOCKS = 41,
+ CP_ME_WAIT_CONTEXT_AVAIL = 42,
+ CP_PFP_TYPE0_PACKET = 43,
+ CP_PFP_TYPE3_PACKET = 44,
+ CP_CSF_RB_WPTR_NEQ_RPTR = 45,
+ CP_CSF_I1_SIZE_NEQ_ZERO = 46,
+ CP_CSF_I2_SIZE_NEQ_ZERO = 47,
+ CP_CSF_RBI1I2_FETCHING = 48,
+};
+
+enum a3xx_gras_tse_perfcounter_select {
+ GRAS_TSEPERF_INPUT_PRIM = 0,
+ GRAS_TSEPERF_INPUT_NULL_PRIM = 1,
+ GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2,
+ GRAS_TSEPERF_CLIPPED_PRIM = 3,
+ GRAS_TSEPERF_NEW_PRIM = 4,
+ GRAS_TSEPERF_ZERO_AREA_PRIM = 5,
+ GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6,
+ GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7,
+ GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8,
+ GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9,
+ GRAS_TSEPERF_PRE_CLIP_PRIM = 10,
+ GRAS_TSEPERF_POST_CLIP_PRIM = 11,
+ GRAS_TSEPERF_WORKING_CYCLES = 12,
+ GRAS_TSEPERF_PC_STARVE = 13,
+ GRAS_TSERASPERF_STALL = 14,
+};
+
+enum a3xx_gras_ras_perfcounter_select {
+ GRAS_RASPERF_16X16_TILES = 0,
+ GRAS_RASPERF_8X8_TILES = 1,
+ GRAS_RASPERF_4X4_TILES = 2,
+ GRAS_RASPERF_WORKING_CYCLES = 3,
+ GRAS_RASPERF_STALL_CYCLES_BY_RB = 4,
+ GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5,
+ GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6,
+};
+
+enum a3xx_hlsq_perfcounter_select {
+ HLSQ_PERF_SP_VS_CONSTANT = 0,
+ HLSQ_PERF_SP_VS_INSTRUCTIONS = 1,
+ HLSQ_PERF_SP_FS_CONSTANT = 2,
+ HLSQ_PERF_SP_FS_INSTRUCTIONS = 3,
+ HLSQ_PERF_TP_STATE = 4,
+ HLSQ_PERF_QUADS = 5,
+ HLSQ_PERF_PIXELS = 6,
+ HLSQ_PERF_VERTICES = 7,
+ HLSQ_PERF_FS8_THREADS = 8,
+ HLSQ_PERF_FS16_THREADS = 9,
+ HLSQ_PERF_FS32_THREADS = 10,
+ HLSQ_PERF_VS8_THREADS = 11,
+ HLSQ_PERF_VS16_THREADS = 12,
+ HLSQ_PERF_SP_VS_DATA_BYTES = 13,
+ HLSQ_PERF_SP_FS_DATA_BYTES = 14,
+ HLSQ_PERF_ACTIVE_CYCLES = 15,
+ HLSQ_PERF_STALL_CYCLES_SP_STATE = 16,
+ HLSQ_PERF_STALL_CYCLES_SP_VS = 17,
+ HLSQ_PERF_STALL_CYCLES_SP_FS = 18,
+ HLSQ_PERF_STALL_CYCLES_UCHE = 19,
+ HLSQ_PERF_RBBM_LOAD_CYCLES = 20,
+ HLSQ_PERF_DI_TO_VS_START_SP0 = 21,
+ HLSQ_PERF_DI_TO_FS_START_SP0 = 22,
+ HLSQ_PERF_VS_START_TO_DONE_SP0 = 23,
+ HLSQ_PERF_FS_START_TO_DONE_SP0 = 24,
+ HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25,
+ HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26,
+ HLSQ_PERF_UCHE_LATENCY_CYCLES = 27,
+ HLSQ_PERF_UCHE_LATENCY_COUNT = 28,
+};
+
+enum a3xx_pc_perfcounter_select {
+ PC_PCPERF_VISIBILITY_STREAMS = 0,
+ PC_PCPERF_TOTAL_INSTANCES = 1,
+ PC_PCPERF_PRIMITIVES_PC_VPC = 2,
+ PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3,
+ PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4,
+ PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5,
+ PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6,
+ PC_PCPERF_VERTICES_TO_VFD = 7,
+ PC_PCPERF_REUSED_VERTICES = 8,
+ PC_PCPERF_CYCLES_STALLED_BY_VFD = 9,
+ PC_PCPERF_CYCLES_STALLED_BY_TSE = 10,
+ PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11,
+ PC_PCPERF_CYCLES_IS_WORKING = 12,
+};
+
+enum a3xx_rb_perfcounter_select {
+ RB_RBPERF_ACTIVE_CYCLES_ANY = 0,
+ RB_RBPERF_ACTIVE_CYCLES_ALL = 1,
+ RB_RBPERF_STARVE_CYCLES_BY_SP = 2,
+ RB_RBPERF_STARVE_CYCLES_BY_RAS = 3,
+ RB_RBPERF_STARVE_CYCLES_BY_MARB = 4,
+ RB_RBPERF_STALL_CYCLES_BY_MARB = 5,
+ RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6,
+ RB_RBPERF_RB_MARB_DATA = 7,
+ RB_RBPERF_SP_RB_QUAD = 8,
+ RB_RBPERF_RAS_EARLY_Z_QUADS = 9,
+ RB_RBPERF_GMEM_CH0_READ = 10,
+ RB_RBPERF_GMEM_CH1_READ = 11,
+ RB_RBPERF_GMEM_CH0_WRITE = 12,
+ RB_RBPERF_GMEM_CH1_WRITE = 13,
+ RB_RBPERF_CP_CONTEXT_DONE = 14,
+ RB_RBPERF_CP_CACHE_FLUSH = 15,
+ RB_RBPERF_CP_ZPASS_DONE = 16,
+};
+
+enum a3xx_rbbm_perfcounter_select {
+ RBBM_ALAWYS_ON = 0,
+ RBBM_VBIF_BUSY = 1,
+ RBBM_TSE_BUSY = 2,
+ RBBM_RAS_BUSY = 3,
+ RBBM_PC_DCALL_BUSY = 4,
+ RBBM_PC_VSD_BUSY = 5,
+ RBBM_VFD_BUSY = 6,
+ RBBM_VPC_BUSY = 7,
+ RBBM_UCHE_BUSY = 8,
+ RBBM_VSC_BUSY = 9,
+ RBBM_HLSQ_BUSY = 10,
+ RBBM_ANY_RB_BUSY = 11,
+ RBBM_ANY_TEX_BUSY = 12,
+ RBBM_ANY_USP_BUSY = 13,
+ RBBM_ANY_MARB_BUSY = 14,
+ RBBM_ANY_ARB_BUSY = 15,
+ RBBM_AHB_STATUS_BUSY = 16,
+ RBBM_AHB_STATUS_STALLED = 17,
+ RBBM_AHB_STATUS_TXFR = 18,
+ RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+ RBBM_AHB_STATUS_TXFR_ERROR = 20,
+ RBBM_AHB_STATUS_LONG_STALL = 21,
+ RBBM_RBBM_STATUS_MASKED = 22,
+};
+
enum a3xx_sp_perfcounter_select {
+ SP_LM_LOAD_INSTRUCTIONS = 0,
+ SP_LM_STORE_INSTRUCTIONS = 1,
+ SP_LM_ATOMICS = 2,
+ SP_UCHE_LOAD_INSTRUCTIONS = 3,
+ SP_UCHE_STORE_INSTRUCTIONS = 4,
+ SP_UCHE_ATOMICS = 5,
+ SP_VS_TEX_INSTRUCTIONS = 6,
+ SP_VS_CFLOW_INSTRUCTIONS = 7,
+ SP_VS_EFU_INSTRUCTIONS = 8,
+ SP_VS_FULL_ALU_INSTRUCTIONS = 9,
+ SP_VS_HALF_ALU_INSTRUCTIONS = 10,
+ SP_FS_TEX_INSTRUCTIONS = 11,
SP_FS_CFLOW_INSTRUCTIONS = 12,
+ SP_FS_EFU_INSTRUCTIONS = 13,
SP_FS_FULL_ALU_INSTRUCTIONS = 14,
- SP0_ICL1_MISSES = 26,
+ SP_FS_HALF_ALU_INSTRUCTIONS = 15,
+ SP_FS_BARY_INSTRUCTIONS = 16,
+ SP_VS_INSTRUCTIONS = 17,
+ SP_FS_INSTRUCTIONS = 18,
+ SP_ADDR_LOCK_COUNT = 19,
+ SP_UCHE_READ_TRANS = 20,
+ SP_UCHE_WRITE_TRANS = 21,
+ SP_EXPORT_VPC_TRANS = 22,
+ SP_EXPORT_RB_TRANS = 23,
+ SP_PIXELS_KILLED = 24,
+ SP_ICL1_REQUESTS = 25,
+ SP_ICL1_MISSES = 26,
+ SP_ICL0_REQUESTS = 27,
+ SP_ICL0_MISSES = 28,
SP_ALU_ACTIVE_CYCLES = 29,
+ SP_EFU_ACTIVE_CYCLES = 30,
+ SP_STALL_CYCLES_BY_VPC = 31,
+ SP_STALL_CYCLES_BY_TP = 32,
+ SP_STALL_CYCLES_BY_UCHE = 33,
+ SP_STALL_CYCLES_BY_RB = 34,
+ SP_ACTIVE_CYCLES_ANY = 35,
+ SP_ACTIVE_CYCLES_ALL = 36,
+};
+
+enum a3xx_tp_perfcounter_select {
+ TPL1_TPPERF_L1_REQUESTS = 0,
+ TPL1_TPPERF_TP0_L1_REQUESTS = 1,
+ TPL1_TPPERF_TP0_L1_MISSES = 2,
+ TPL1_TPPERF_TP1_L1_REQUESTS = 3,
+ TPL1_TPPERF_TP1_L1_MISSES = 4,
+ TPL1_TPPERF_TP2_L1_REQUESTS = 5,
+ TPL1_TPPERF_TP2_L1_MISSES = 6,
+ TPL1_TPPERF_TP3_L1_REQUESTS = 7,
+ TPL1_TPPERF_TP3_L1_MISSES = 8,
+ TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9,
+ TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10,
+ TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11,
+ TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12,
+ TPL1_TPPERF_BILINEAR_OPS = 13,
+ TPL1_TPPERF_QUADSQUADS_OFFSET = 14,
+ TPL1_TPPERF_QUADQUADS_SHADOW = 15,
+ TPL1_TPPERF_QUADS_ARRAY = 16,
+ TPL1_TPPERF_QUADS_PROJECTION = 17,
+ TPL1_TPPERF_QUADS_GRADIENT = 18,
+ TPL1_TPPERF_QUADS_1D2D = 19,
+ TPL1_TPPERF_QUADS_3DCUBE = 20,
+ TPL1_TPPERF_ZERO_LOD = 21,
+ TPL1_TPPERF_OUTPUT_TEXELS = 22,
+ TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23,
+ TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24,
+ TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25,
+ TPL1_TPPERF_LATENCY = 26,
+ TPL1_TPPERF_LATENCY_TRANS = 27,
+};
+
+enum a3xx_vfd_perfcounter_select {
+ VFD_PERF_UCHE_BYTE_FETCHED = 0,
+ VFD_PERF_UCHE_TRANS = 1,
+ VFD_PERF_VPC_BYPASS_COMPONENTS = 2,
+ VFD_PERF_FETCH_INSTRUCTIONS = 3,
+ VFD_PERF_DECODE_INSTRUCTIONS = 4,
+ VFD_PERF_ACTIVE_CYCLES = 5,
+ VFD_PERF_STALL_CYCLES_UCHE = 6,
+ VFD_PERF_STALL_CYCLES_HLSQ = 7,
+ VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8,
+ VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9,
+};
+
+enum a3xx_vpc_perfcounter_select {
+ VPC_PERF_SP_LM_PRIMITIVES = 0,
+ VPC_PERF_COMPONENTS_FROM_SP = 1,
+ VPC_PERF_SP_LM_COMPONENTS = 2,
+ VPC_PERF_ACTIVE_CYCLES = 3,
+ VPC_PERF_STALL_CYCLES_LM = 4,
+ VPC_PERF_STALL_CYCLES_RAS = 5,
+};
+
+enum a3xx_uche_perfcounter_select {
+ UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4,
+ UCHE_UCHEPERF_READ_REQUESTS_TP = 8,
+ UCHE_UCHEPERF_READ_REQUESTS_VFD = 9,
+ UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10,
+ UCHE_UCHEPERF_READ_REQUESTS_MARB = 11,
+ UCHE_UCHEPERF_READ_REQUESTS_SP = 12,
+ UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13,
+ UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14,
+ UCHE_UCHEPERF_TAG_CHECK_FAILS = 15,
+ UCHE_UCHEPERF_EVICTS = 16,
+ UCHE_UCHEPERF_FLUSHES = 17,
+ UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18,
+ UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19,
+ UCHE_UCHEPERF_ACTIVE_CYCLES = 20,
};
enum a3xx_rb_blend_opcode {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 811f58bbba2..8c37992e17d 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
#include "fd3_emit.h"
#include "fd3_blend.h"
@@ -888,6 +889,8 @@ fd3_emit_restore(struct fd_context *ctx)
fd_wfi(ctx, ring);
+ fd_hw_query_enable(ctx, ring);
+
ctx->needs_rb_fbd = true;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index e8df429441e..d6fd1bb583e 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -271,6 +272,545 @@ enum a4xx_tess_spacing {
EVEN_SPACING = 3,
};
+enum a4xx_ccu_perfcounter_select {
+ CCU_BUSY_CYCLES = 0,
+ CCU_RB_DEPTH_RETURN_STALL = 2,
+ CCU_RB_COLOR_RETURN_STALL = 3,
+ CCU_DEPTH_BLOCKS = 6,
+ CCU_COLOR_BLOCKS = 7,
+ CCU_DEPTH_BLOCK_HIT = 8,
+ CCU_COLOR_BLOCK_HIT = 9,
+ CCU_DEPTH_FLAG1_COUNT = 10,
+ CCU_DEPTH_FLAG2_COUNT = 11,
+ CCU_DEPTH_FLAG3_COUNT = 12,
+ CCU_DEPTH_FLAG4_COUNT = 13,
+ CCU_COLOR_FLAG1_COUNT = 14,
+ CCU_COLOR_FLAG2_COUNT = 15,
+ CCU_COLOR_FLAG3_COUNT = 16,
+ CCU_COLOR_FLAG4_COUNT = 17,
+ CCU_PARTIAL_BLOCK_READ = 18,
+};
+
+enum a4xx_cp_perfcounter_select {
+ CP_ALWAYS_COUNT = 0,
+ CP_BUSY = 1,
+ CP_PFP_IDLE = 2,
+ CP_PFP_BUSY_WORKING = 3,
+ CP_PFP_STALL_CYCLES_ANY = 4,
+ CP_PFP_STARVE_CYCLES_ANY = 5,
+ CP_PFP_STARVED_PER_LOAD_ADDR = 6,
+ CP_PFP_STALLED_PER_STORE_ADDR = 7,
+ CP_PFP_PC_PROFILE = 8,
+ CP_PFP_MATCH_PM4_PKT_PROFILE = 9,
+ CP_PFP_COND_INDIRECT_DISCARDED = 10,
+ CP_LONG_RESUMPTIONS = 11,
+ CP_RESUME_CYCLES = 12,
+ CP_RESUME_TO_BOUNDARY_CYCLES = 13,
+ CP_LONG_PREEMPTIONS = 14,
+ CP_PREEMPT_CYCLES = 15,
+ CP_PREEMPT_TO_BOUNDARY_CYCLES = 16,
+ CP_ME_FIFO_EMPTY_PFP_IDLE = 17,
+ CP_ME_FIFO_EMPTY_PFP_BUSY = 18,
+ CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19,
+ CP_ME_FIFO_FULL_ME_BUSY = 20,
+ CP_ME_FIFO_FULL_ME_NON_WORKING = 21,
+ CP_ME_WAITING_FOR_PACKETS = 22,
+ CP_ME_BUSY_WORKING = 23,
+ CP_ME_STARVE_CYCLES_ANY = 24,
+ CP_ME_STARVE_CYCLES_PER_PROFILE = 25,
+ CP_ME_STALL_CYCLES_PER_PROFILE = 26,
+ CP_ME_PC_PROFILE = 27,
+ CP_RCIU_FIFO_EMPTY = 28,
+ CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29,
+ CP_RCIU_FIFO_FULL = 30,
+ CP_RCIU_FIFO_FULL_NO_CONTEXT = 31,
+ CP_RCIU_FIFO_FULL_AHB_MASTER = 32,
+ CP_RCIU_FIFO_FULL_OTHER = 33,
+ CP_AHB_IDLE = 34,
+ CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35,
+ CP_AHB_STALL_ON_GRANT_SPLIT = 36,
+ CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37,
+ CP_AHB_BUSY_WORKING = 38,
+ CP_AHB_BUSY_STALL_ON_HRDY = 39,
+ CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40,
+};
+
+enum a4xx_gras_ras_perfcounter_select {
+ RAS_SUPER_TILES = 0,
+ RAS_8X8_TILES = 1,
+ RAS_4X4_TILES = 2,
+ RAS_BUSY_CYCLES = 3,
+ RAS_STALL_CYCLES_BY_RB = 4,
+ RAS_STALL_CYCLES_BY_VSC = 5,
+ RAS_STARVE_CYCLES_BY_TSE = 6,
+ RAS_SUPERTILE_CYCLES = 7,
+ RAS_TILE_CYCLES = 8,
+ RAS_FULLY_COVERED_SUPER_TILES = 9,
+ RAS_FULLY_COVERED_8X8_TILES = 10,
+ RAS_4X4_PRIM = 11,
+ RAS_8X4_4X8_PRIM = 12,
+ RAS_8X8_PRIM = 13,
+};
+
+enum a4xx_gras_tse_perfcounter_select {
+ TSE_INPUT_PRIM = 0,
+ TSE_INPUT_NULL_PRIM = 1,
+ TSE_TRIVAL_REJ_PRIM = 2,
+ TSE_CLIPPED_PRIM = 3,
+ TSE_NEW_PRIM = 4,
+ TSE_ZERO_AREA_PRIM = 5,
+ TSE_FACENESS_CULLED_PRIM = 6,
+ TSE_ZERO_PIXEL_PRIM = 7,
+ TSE_OUTPUT_NULL_PRIM = 8,
+ TSE_OUTPUT_VISIBLE_PRIM = 9,
+ TSE_PRE_CLIP_PRIM = 10,
+ TSE_POST_CLIP_PRIM = 11,
+ TSE_BUSY_CYCLES = 12,
+ TSE_PC_STARVE = 13,
+ TSE_RAS_STALL = 14,
+ TSE_STALL_BARYPLANE_FIFO_FULL = 15,
+ TSE_STALL_ZPLANE_FIFO_FULL = 16,
+};
+
+enum a4xx_hlsq_perfcounter_select {
+ HLSQ_SP_VS_STAGE_CONSTANT = 0,
+ HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1,
+ HLSQ_SP_FS_STAGE_CONSTANT = 2,
+ HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3,
+ HLSQ_TP_STATE = 4,
+ HLSQ_QUADS = 5,
+ HLSQ_PIXELS = 6,
+ HLSQ_VERTICES = 7,
+ HLSQ_SP_VS_STAGE_DATA_BYTES = 13,
+ HLSQ_SP_FS_STAGE_DATA_BYTES = 14,
+ HLSQ_BUSY_CYCLES = 15,
+ HLSQ_STALL_CYCLES_SP_STATE = 16,
+ HLSQ_STALL_CYCLES_SP_VS_STAGE = 17,
+ HLSQ_STALL_CYCLES_SP_FS_STAGE = 18,
+ HLSQ_STALL_CYCLES_UCHE = 19,
+ HLSQ_RBBM_LOAD_CYCLES = 20,
+ HLSQ_DI_TO_VS_START_SP = 21,
+ HLSQ_DI_TO_FS_START_SP = 22,
+ HLSQ_VS_STAGE_START_TO_DONE_SP = 23,
+ HLSQ_FS_STAGE_START_TO_DONE_SP = 24,
+ HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25,
+ HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26,
+ HLSQ_UCHE_LATENCY_CYCLES = 27,
+ HLSQ_UCHE_LATENCY_COUNT = 28,
+ HLSQ_STARVE_CYCLES_VFD = 29,
+};
+
+enum a4xx_pc_perfcounter_select {
+ PC_VIS_STREAMS_LOADED = 0,
+ PC_VPC_PRIMITIVES = 2,
+ PC_DEAD_PRIM = 3,
+ PC_LIVE_PRIM = 4,
+ PC_DEAD_DRAWCALLS = 5,
+ PC_LIVE_DRAWCALLS = 6,
+ PC_VERTEX_MISSES = 7,
+ PC_STALL_CYCLES_VFD = 9,
+ PC_STALL_CYCLES_TSE = 10,
+ PC_STALL_CYCLES_UCHE = 11,
+ PC_WORKING_CYCLES = 12,
+ PC_IA_VERTICES = 13,
+ PC_GS_PRIMITIVES = 14,
+ PC_HS_INVOCATIONS = 15,
+ PC_DS_INVOCATIONS = 16,
+ PC_DS_PRIMITIVES = 17,
+ PC_STARVE_CYCLES_FOR_INDEX = 20,
+ PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21,
+ PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22,
+ PC_STALL_CYCLES_TESS = 23,
+ PC_STARVE_CYCLES_FOR_POSITION = 24,
+ PC_MODE0_DRAWCALL = 25,
+ PC_MODE1_DRAWCALL = 26,
+ PC_MODE2_DRAWCALL = 27,
+ PC_MODE3_DRAWCALL = 28,
+ PC_MODE4_DRAWCALL = 29,
+ PC_PREDICATED_DEAD_DRAWCALL = 30,
+ PC_STALL_CYCLES_BY_TSE_ONLY = 31,
+ PC_STALL_CYCLES_BY_VPC_ONLY = 32,
+ PC_VPC_POS_DATA_TRANSACTION = 33,
+ PC_BUSY_CYCLES = 34,
+ PC_STARVE_CYCLES_DI = 35,
+ PC_STALL_CYCLES_VPC = 36,
+ TESS_WORKING_CYCLES = 37,
+ TESS_NUM_CYCLES_SETUP_WORKING = 38,
+ TESS_NUM_CYCLES_PTGEN_WORKING = 39,
+ TESS_NUM_CYCLES_CONNGEN_WORKING = 40,
+ TESS_BUSY_CYCLES = 41,
+ TESS_STARVE_CYCLES_PC = 42,
+ TESS_STALL_CYCLES_PC = 43,
+};
+
+enum a4xx_pwr_perfcounter_select {
+ PWR_CORE_CLOCK_CYCLES = 0,
+ PWR_BUSY_CLOCK_CYCLES = 1,
+};
+
+enum a4xx_rb_perfcounter_select {
+ RB_BUSY_CYCLES = 0,
+ RB_BUSY_CYCLES_BINNING = 1,
+ RB_BUSY_CYCLES_RENDERING = 2,
+ RB_BUSY_CYCLES_RESOLVE = 3,
+ RB_STARVE_CYCLES_BY_SP = 4,
+ RB_STARVE_CYCLES_BY_RAS = 5,
+ RB_STARVE_CYCLES_BY_MARB = 6,
+ RB_STALL_CYCLES_BY_MARB = 7,
+ RB_STALL_CYCLES_BY_HLSQ = 8,
+ RB_RB_RB_MARB_DATA = 9,
+ RB_SP_RB_QUAD = 10,
+ RB_RAS_RB_Z_QUADS = 11,
+ RB_GMEM_CH0_READ = 12,
+ RB_GMEM_CH1_READ = 13,
+ RB_GMEM_CH0_WRITE = 14,
+ RB_GMEM_CH1_WRITE = 15,
+ RB_CP_CONTEXT_DONE = 16,
+ RB_CP_CACHE_FLUSH = 17,
+ RB_CP_ZPASS_DONE = 18,
+ RB_STALL_FIFO0_FULL = 19,
+ RB_STALL_FIFO1_FULL = 20,
+ RB_STALL_FIFO2_FULL = 21,
+ RB_STALL_FIFO3_FULL = 22,
+ RB_RB_HLSQ_TRANSACTIONS = 23,
+ RB_Z_READ = 24,
+ RB_Z_WRITE = 25,
+ RB_C_READ = 26,
+ RB_C_WRITE = 27,
+ RB_C_READ_LATENCY = 28,
+ RB_Z_READ_LATENCY = 29,
+ RB_STALL_BY_UCHE = 30,
+ RB_MARB_UCHE_TRANSACTIONS = 31,
+ RB_CACHE_STALL_MISS = 32,
+ RB_CACHE_STALL_FIFO_FULL = 33,
+ RB_8BIT_BLENDER_UNITS_ACTIVE = 34,
+ RB_16BIT_BLENDER_UNITS_ACTIVE = 35,
+ RB_SAMPLER_UNITS_ACTIVE = 36,
+ RB_TOTAL_PASS = 38,
+ RB_Z_PASS = 39,
+ RB_Z_FAIL = 40,
+ RB_S_FAIL = 41,
+ RB_POWER0 = 42,
+ RB_POWER1 = 43,
+ RB_POWER2 = 44,
+ RB_POWER3 = 45,
+ RB_POWER4 = 46,
+ RB_POWER5 = 47,
+ RB_POWER6 = 48,
+ RB_POWER7 = 49,
+};
+
+enum a4xx_rbbm_perfcounter_select {
+ RBBM_ALWAYS_ON = 0,
+ RBBM_VBIF_BUSY = 1,
+ RBBM_TSE_BUSY = 2,
+ RBBM_RAS_BUSY = 3,
+ RBBM_PC_DCALL_BUSY = 4,
+ RBBM_PC_VSD_BUSY = 5,
+ RBBM_VFD_BUSY = 6,
+ RBBM_VPC_BUSY = 7,
+ RBBM_UCHE_BUSY = 8,
+ RBBM_VSC_BUSY = 9,
+ RBBM_HLSQ_BUSY = 10,
+ RBBM_ANY_RB_BUSY = 11,
+ RBBM_ANY_TPL1_BUSY = 12,
+ RBBM_ANY_SP_BUSY = 13,
+ RBBM_ANY_MARB_BUSY = 14,
+ RBBM_ANY_ARB_BUSY = 15,
+ RBBM_AHB_STATUS_BUSY = 16,
+ RBBM_AHB_STATUS_STALLED = 17,
+ RBBM_AHB_STATUS_TXFR = 18,
+ RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+ RBBM_AHB_STATUS_TXFR_ERROR = 20,
+ RBBM_AHB_STATUS_LONG_STALL = 21,
+ RBBM_STATUS_MASKED = 22,
+ RBBM_CP_BUSY_GFX_CORE_IDLE = 23,
+ RBBM_TESS_BUSY = 24,
+ RBBM_COM_BUSY = 25,
+ RBBM_DCOM_BUSY = 32,
+ RBBM_ANY_CCU_BUSY = 33,
+ RBBM_DPM_BUSY = 34,
+};
+
+enum a4xx_sp_perfcounter_select {
+ SP_LM_LOAD_INSTRUCTIONS = 0,
+ SP_LM_STORE_INSTRUCTIONS = 1,
+ SP_LM_ATOMICS = 2,
+ SP_GM_LOAD_INSTRUCTIONS = 3,
+ SP_GM_STORE_INSTRUCTIONS = 4,
+ SP_GM_ATOMICS = 5,
+ SP_VS_STAGE_TEX_INSTRUCTIONS = 6,
+ SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7,
+ SP_VS_STAGE_EFU_INSTRUCTIONS = 8,
+ SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9,
+ SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10,
+ SP_FS_STAGE_TEX_INSTRUCTIONS = 11,
+ SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12,
+ SP_FS_STAGE_EFU_INSTRUCTIONS = 13,
+ SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14,
+ SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15,
+ SP_VS_INSTRUCTIONS = 17,
+ SP_FS_INSTRUCTIONS = 18,
+ SP_ADDR_LOCK_COUNT = 19,
+ SP_UCHE_READ_TRANS = 20,
+ SP_UCHE_WRITE_TRANS = 21,
+ SP_EXPORT_VPC_TRANS = 22,
+ SP_EXPORT_RB_TRANS = 23,
+ SP_PIXELS_KILLED = 24,
+ SP_ICL1_REQUESTS = 25,
+ SP_ICL1_MISSES = 26,
+ SP_ICL0_REQUESTS = 27,
+ SP_ICL0_MISSES = 28,
+ SP_ALU_WORKING_CYCLES = 29,
+ SP_EFU_WORKING_CYCLES = 30,
+ SP_STALL_CYCLES_BY_VPC = 31,
+ SP_STALL_CYCLES_BY_TP = 32,
+ SP_STALL_CYCLES_BY_UCHE = 33,
+ SP_STALL_CYCLES_BY_RB = 34,
+ SP_BUSY_CYCLES = 35,
+ SP_HS_INSTRUCTIONS = 36,
+ SP_DS_INSTRUCTIONS = 37,
+ SP_GS_INSTRUCTIONS = 38,
+ SP_CS_INSTRUCTIONS = 39,
+ SP_SCHEDULER_NON_WORKING = 40,
+ SP_WAVE_CONTEXTS = 41,
+ SP_WAVE_CONTEXT_CYCLES = 42,
+ SP_POWER0 = 43,
+ SP_POWER1 = 44,
+ SP_POWER2 = 45,
+ SP_POWER3 = 46,
+ SP_POWER4 = 47,
+ SP_POWER5 = 48,
+ SP_POWER6 = 49,
+ SP_POWER7 = 50,
+ SP_POWER8 = 51,
+ SP_POWER9 = 52,
+ SP_POWER10 = 53,
+ SP_POWER11 = 54,
+ SP_POWER12 = 55,
+ SP_POWER13 = 56,
+ SP_POWER14 = 57,
+ SP_POWER15 = 58,
+};
+
+enum a4xx_tp_perfcounter_select {
+ TP_L1_REQUESTS = 0,
+ TP_L1_MISSES = 1,
+ TP_QUADS_OFFSET = 8,
+ TP_QUAD_SHADOW = 9,
+ TP_QUADS_ARRAY = 10,
+ TP_QUADS_GRADIENT = 11,
+ TP_QUADS_1D2D = 12,
+ TP_QUADS_3DCUBE = 13,
+ TP_BUSY_CYCLES = 16,
+ TP_STALL_CYCLES_BY_ARB = 17,
+ TP_STATE_CACHE_REQUESTS = 20,
+ TP_STATE_CACHE_MISSES = 21,
+ TP_POWER0 = 22,
+ TP_POWER1 = 23,
+ TP_POWER2 = 24,
+ TP_POWER3 = 25,
+ TP_POWER4 = 26,
+ TP_POWER5 = 27,
+ TP_POWER6 = 28,
+ TP_POWER7 = 29,
+};
+
+enum a4xx_uche_perfcounter_select {
+ UCHE_VBIF_READ_BEATS_TP = 0,
+ UCHE_VBIF_READ_BEATS_VFD = 1,
+ UCHE_VBIF_READ_BEATS_HLSQ = 2,
+ UCHE_VBIF_READ_BEATS_MARB = 3,
+ UCHE_VBIF_READ_BEATS_SP = 4,
+ UCHE_READ_REQUESTS_TP = 5,
+ UCHE_READ_REQUESTS_VFD = 6,
+ UCHE_READ_REQUESTS_HLSQ = 7,
+ UCHE_READ_REQUESTS_MARB = 8,
+ UCHE_READ_REQUESTS_SP = 9,
+ UCHE_WRITE_REQUESTS_MARB = 10,
+ UCHE_WRITE_REQUESTS_SP = 11,
+ UCHE_TAG_CHECK_FAILS = 12,
+ UCHE_EVICTS = 13,
+ UCHE_FLUSHES = 14,
+ UCHE_VBIF_LATENCY_CYCLES = 15,
+ UCHE_VBIF_LATENCY_SAMPLES = 16,
+ UCHE_BUSY_CYCLES = 17,
+ UCHE_VBIF_READ_BEATS_PC = 18,
+ UCHE_READ_REQUESTS_PC = 19,
+ UCHE_WRITE_REQUESTS_VPC = 20,
+ UCHE_STALL_BY_VBIF = 21,
+ UCHE_WRITE_REQUESTS_VSC = 22,
+ UCHE_POWER0 = 23,
+ UCHE_POWER1 = 24,
+ UCHE_POWER2 = 25,
+ UCHE_POWER3 = 26,
+ UCHE_POWER4 = 27,
+ UCHE_POWER5 = 28,
+ UCHE_POWER6 = 29,
+ UCHE_POWER7 = 30,
+};
+
+enum a4xx_vbif_perfcounter_select {
+ AXI_READ_REQUESTS_ID_0 = 0,
+ AXI_READ_REQUESTS_ID_1 = 1,
+ AXI_READ_REQUESTS_ID_2 = 2,
+ AXI_READ_REQUESTS_ID_3 = 3,
+ AXI_READ_REQUESTS_ID_4 = 4,
+ AXI_READ_REQUESTS_ID_5 = 5,
+ AXI_READ_REQUESTS_ID_6 = 6,
+ AXI_READ_REQUESTS_ID_7 = 7,
+ AXI_READ_REQUESTS_ID_8 = 8,
+ AXI_READ_REQUESTS_ID_9 = 9,
+ AXI_READ_REQUESTS_ID_10 = 10,
+ AXI_READ_REQUESTS_ID_11 = 11,
+ AXI_READ_REQUESTS_ID_12 = 12,
+ AXI_READ_REQUESTS_ID_13 = 13,
+ AXI_READ_REQUESTS_ID_14 = 14,
+ AXI_READ_REQUESTS_ID_15 = 15,
+ AXI0_READ_REQUESTS_TOTAL = 16,
+ AXI1_READ_REQUESTS_TOTAL = 17,
+ AXI2_READ_REQUESTS_TOTAL = 18,
+ AXI3_READ_REQUESTS_TOTAL = 19,
+ AXI_READ_REQUESTS_TOTAL = 20,
+ AXI_WRITE_REQUESTS_ID_0 = 21,
+ AXI_WRITE_REQUESTS_ID_1 = 22,
+ AXI_WRITE_REQUESTS_ID_2 = 23,
+ AXI_WRITE_REQUESTS_ID_3 = 24,
+ AXI_WRITE_REQUESTS_ID_4 = 25,
+ AXI_WRITE_REQUESTS_ID_5 = 26,
+ AXI_WRITE_REQUESTS_ID_6 = 27,
+ AXI_WRITE_REQUESTS_ID_7 = 28,
+ AXI_WRITE_REQUESTS_ID_8 = 29,
+ AXI_WRITE_REQUESTS_ID_9 = 30,
+ AXI_WRITE_REQUESTS_ID_10 = 31,
+ AXI_WRITE_REQUESTS_ID_11 = 32,
+ AXI_WRITE_REQUESTS_ID_12 = 33,
+ AXI_WRITE_REQUESTS_ID_13 = 34,
+ AXI_WRITE_REQUESTS_ID_14 = 35,
+ AXI_WRITE_REQUESTS_ID_15 = 36,
+ AXI0_WRITE_REQUESTS_TOTAL = 37,
+ AXI1_WRITE_REQUESTS_TOTAL = 38,
+ AXI2_WRITE_REQUESTS_TOTAL = 39,
+ AXI3_WRITE_REQUESTS_TOTAL = 40,
+ AXI_WRITE_REQUESTS_TOTAL = 41,
+ AXI_TOTAL_REQUESTS = 42,
+ AXI_READ_DATA_BEATS_ID_0 = 43,
+ AXI_READ_DATA_BEATS_ID_1 = 44,
+ AXI_READ_DATA_BEATS_ID_2 = 45,
+ AXI_READ_DATA_BEATS_ID_3 = 46,
+ AXI_READ_DATA_BEATS_ID_4 = 47,
+ AXI_READ_DATA_BEATS_ID_5 = 48,
+ AXI_READ_DATA_BEATS_ID_6 = 49,
+ AXI_READ_DATA_BEATS_ID_7 = 50,
+ AXI_READ_DATA_BEATS_ID_8 = 51,
+ AXI_READ_DATA_BEATS_ID_9 = 52,
+ AXI_READ_DATA_BEATS_ID_10 = 53,
+ AXI_READ_DATA_BEATS_ID_11 = 54,
+ AXI_READ_DATA_BEATS_ID_12 = 55,
+ AXI_READ_DATA_BEATS_ID_13 = 56,
+ AXI_READ_DATA_BEATS_ID_14 = 57,
+ AXI_READ_DATA_BEATS_ID_15 = 58,
+ AXI0_READ_DATA_BEATS_TOTAL = 59,
+ AXI1_READ_DATA_BEATS_TOTAL = 60,
+ AXI2_READ_DATA_BEATS_TOTAL = 61,
+ AXI3_READ_DATA_BEATS_TOTAL = 62,
+ AXI_READ_DATA_BEATS_TOTAL = 63,
+ AXI_WRITE_DATA_BEATS_ID_0 = 64,
+ AXI_WRITE_DATA_BEATS_ID_1 = 65,
+ AXI_WRITE_DATA_BEATS_ID_2 = 66,
+ AXI_WRITE_DATA_BEATS_ID_3 = 67,
+ AXI_WRITE_DATA_BEATS_ID_4 = 68,
+ AXI_WRITE_DATA_BEATS_ID_5 = 69,
+ AXI_WRITE_DATA_BEATS_ID_6 = 70,
+ AXI_WRITE_DATA_BEATS_ID_7 = 71,
+ AXI_WRITE_DATA_BEATS_ID_8 = 72,
+ AXI_WRITE_DATA_BEATS_ID_9 = 73,
+ AXI_WRITE_DATA_BEATS_ID_10 = 74,
+ AXI_WRITE_DATA_BEATS_ID_11 = 75,
+ AXI_WRITE_DATA_BEATS_ID_12 = 76,
+ AXI_WRITE_DATA_BEATS_ID_13 = 77,
+ AXI_WRITE_DATA_BEATS_ID_14 = 78,
+ AXI_WRITE_DATA_BEATS_ID_15 = 79,
+ AXI0_WRITE_DATA_BEATS_TOTAL = 80,
+ AXI1_WRITE_DATA_BEATS_TOTAL = 81,
+ AXI2_WRITE_DATA_BEATS_TOTAL = 82,
+ AXI3_WRITE_DATA_BEATS_TOTAL = 83,
+ AXI_WRITE_DATA_BEATS_TOTAL = 84,
+ AXI_DATA_BEATS_TOTAL = 85,
+ CYCLES_HELD_OFF_ID_0 = 86,
+ CYCLES_HELD_OFF_ID_1 = 87,
+ CYCLES_HELD_OFF_ID_2 = 88,
+ CYCLES_HELD_OFF_ID_3 = 89,
+ CYCLES_HELD_OFF_ID_4 = 90,
+ CYCLES_HELD_OFF_ID_5 = 91,
+ CYCLES_HELD_OFF_ID_6 = 92,
+ CYCLES_HELD_OFF_ID_7 = 93,
+ CYCLES_HELD_OFF_ID_8 = 94,
+ CYCLES_HELD_OFF_ID_9 = 95,
+ CYCLES_HELD_OFF_ID_10 = 96,
+ CYCLES_HELD_OFF_ID_11 = 97,
+ CYCLES_HELD_OFF_ID_12 = 98,
+ CYCLES_HELD_OFF_ID_13 = 99,
+ CYCLES_HELD_OFF_ID_14 = 100,
+ CYCLES_HELD_OFF_ID_15 = 101,
+ AXI_READ_REQUEST_HELD_OFF = 102,
+ AXI_WRITE_REQUEST_HELD_OFF = 103,
+ AXI_REQUEST_HELD_OFF = 104,
+ AXI_WRITE_DATA_HELD_OFF = 105,
+ OCMEM_AXI_READ_REQUEST_HELD_OFF = 106,
+ OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107,
+ OCMEM_AXI_REQUEST_HELD_OFF = 108,
+ OCMEM_AXI_WRITE_DATA_HELD_OFF = 109,
+ ELAPSED_CYCLES_DDR = 110,
+ ELAPSED_CYCLES_OCMEM = 111,
+};
+
+enum a4xx_vfd_perfcounter_select {
+ VFD_UCHE_BYTE_FETCHED = 0,
+ VFD_UCHE_TRANS = 1,
+ VFD_FETCH_INSTRUCTIONS = 3,
+ VFD_BUSY_CYCLES = 5,
+ VFD_STALL_CYCLES_UCHE = 6,
+ VFD_STALL_CYCLES_HLSQ = 7,
+ VFD_STALL_CYCLES_VPC_BYPASS = 8,
+ VFD_STALL_CYCLES_VPC_ALLOC = 9,
+ VFD_MODE_0_FIBERS = 13,
+ VFD_MODE_1_FIBERS = 14,
+ VFD_MODE_2_FIBERS = 15,
+ VFD_MODE_3_FIBERS = 16,
+ VFD_MODE_4_FIBERS = 17,
+ VFD_BFIFO_STALL = 18,
+ VFD_NUM_VERTICES_TOTAL = 19,
+ VFD_PACKER_FULL = 20,
+ VFD_UCHE_REQUEST_FIFO_FULL = 21,
+ VFD_STARVE_CYCLES_PC = 22,
+ VFD_STARVE_CYCLES_UCHE = 23,
+};
+
+enum a4xx_vpc_perfcounter_select {
+ VPC_SP_LM_COMPONENTS = 2,
+ VPC_SP0_LM_BYTES = 3,
+ VPC_SP1_LM_BYTES = 4,
+ VPC_SP2_LM_BYTES = 5,
+ VPC_SP3_LM_BYTES = 6,
+ VPC_WORKING_CYCLES = 7,
+ VPC_STALL_CYCLES_LM = 8,
+ VPC_STARVE_CYCLES_RAS = 9,
+ VPC_STREAMOUT_CYCLES = 10,
+ VPC_UCHE_TRANSACTIONS = 12,
+ VPC_STALL_CYCLES_UCHE = 13,
+ VPC_BUSY_CYCLES = 14,
+ VPC_STARVE_CYCLES_SP = 15,
+};
+
+enum a4xx_vsc_perfcounter_select {
+ VSC_BUSY_CYCLES = 0,
+ VSC_WORKING_CYCLES = 1,
+ VSC_STALL_CYCLES_UCHE = 2,
+ VSC_STARVE_CYCLES_RAS = 3,
+ VSC_EOT_NUM = 4,
+};
+
enum a4xx_tex_filter {
A4XX_TEX_NEAREST = 0,
A4XX_TEX_LINEAR = 1,
@@ -357,6 +897,12 @@ static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val)
#define REG_A4XX_RB_PERFCTR_RB_SEL_7 0x00000cce
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_0 0x00000ccf
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_1 0x00000cd0
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_2 0x00000cd1
+
#define REG_A4XX_RB_PERFCTR_CCU_SEL_3 0x00000cd2
#define REG_A4XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0
@@ -1070,6 +1616,380 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x
#define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c
+#define REG_A4XX_RBBM_PERFCTR_CP_0_HI 0x0000009d
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_LO 0x0000009e
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_HI 0x0000009f
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_LO 0x000000a0
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_HI 0x000000a1
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_LO 0x000000a2
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_HI 0x000000a3
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_LO 0x000000a4
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_HI 0x000000a5
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_LO 0x000000a6
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_HI 0x000000a7
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_LO 0x000000a8
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_HI 0x000000a9
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_LO 0x000000aa
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_HI 0x000000ab
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO 0x000000ac
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI 0x000000ad
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO 0x000000ae
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI 0x000000af
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO 0x000000b0
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI 0x000000b1
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO 0x000000b2
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI 0x000000b3
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_LO 0x000000b4
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_HI 0x000000b5
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_LO 0x000000b6
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_HI 0x000000b7
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_LO 0x000000b8
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_HI 0x000000b9
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_LO 0x000000ba
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_HI 0x000000bb
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_LO 0x000000bc
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_HI 0x000000bd
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_LO 0x000000be
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_HI 0x000000bf
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_LO 0x000000c0
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_HI 0x000000c1
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_LO 0x000000c2
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_HI 0x000000c3
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO 0x000000c4
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI 0x000000c5
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO 0x000000c6
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI 0x000000c7
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO 0x000000c8
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI 0x000000c9
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO 0x000000ca
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI 0x000000cb
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO 0x000000cc
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI 0x000000cd
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO 0x000000ce
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI 0x000000cf
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO 0x000000d0
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI 0x000000d1
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO 0x000000d2
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI 0x000000d3
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000d4
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000d5
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000d6
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000d7
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000d8
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000d9
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000da
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000db
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000dc
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000dd
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000de
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000df
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO 0x000000e0
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI 0x000000e1
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO 0x000000e2
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI 0x000000e3
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO 0x000000e4
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI 0x000000e5
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO 0x000000e6
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI 0x000000e7
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO 0x000000e8
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI 0x000000e9
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO 0x000000ea
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI 0x000000eb
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO 0x000000ec
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI 0x000000ed
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO 0x000000ee
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI 0x000000ef
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO 0x000000f0
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI 0x000000f1
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO 0x000000f2
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI 0x000000f3
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO 0x000000f4
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI 0x000000f5
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO 0x000000f6
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI 0x000000f7
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO 0x000000f8
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI 0x000000f9
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO 0x000000fa
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI 0x000000fb
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO 0x000000fc
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI 0x000000fd
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO 0x000000fe
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI 0x000000ff
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO 0x00000100
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI 0x00000101
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO 0x00000102
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI 0x00000103
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO 0x00000104
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI 0x00000105
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO 0x00000106
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI 0x00000107
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO 0x00000108
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI 0x00000109
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO 0x0000010a
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI 0x0000010b
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO 0x0000010c
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI 0x0000010d
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO 0x0000010e
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI 0x0000010f
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO 0x00000110
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI 0x00000111
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO 0x00000112
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI 0x00000113
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_LO 0x00000116
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_HI 0x00000117
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_LO 0x00000118
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_HI 0x00000119
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_LO 0x0000011a
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_HI 0x0000011b
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_LO 0x0000011c
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_HI 0x0000011d
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_LO 0x0000011e
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_HI 0x0000011f
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_LO 0x00000120
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_HI 0x00000121
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_LO 0x00000122
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_HI 0x00000123
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_LO 0x00000124
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_HI 0x00000125
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_LO 0x00000126
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_HI 0x00000127
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_LO 0x00000128
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_HI 0x00000129
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_LO 0x0000012a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_HI 0x0000012b
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_LO 0x0000012c
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_HI 0x0000012d
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_LO 0x0000012e
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_HI 0x0000012f
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_LO 0x00000130
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_HI 0x00000131
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_LO 0x00000132
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_HI 0x00000133
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_LO 0x00000134
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_HI 0x00000135
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_LO 0x00000136
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_HI 0x00000137
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_LO 0x00000138
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_HI 0x00000139
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_LO 0x0000013a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_HI 0x0000013b
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_LO 0x0000013c
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_HI 0x0000013d
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_LO 0x0000013e
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_HI 0x0000013f
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_LO 0x00000140
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_HI 0x00000141
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_LO 0x00000142
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_HI 0x00000143
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_LO 0x00000144
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_HI 0x00000145
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_LO 0x00000146
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_HI 0x00000147
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_LO 0x00000148
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_HI 0x00000149
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_LO 0x0000014a
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_HI 0x0000014b
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO 0x0000014c
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI 0x0000014d
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO 0x0000014e
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI 0x0000014f
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO 0x00000166
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI 0x00000167
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI 0x00000169
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO 0x0000016e
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI 0x0000016f
+
static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; }
@@ -1136,6 +2056,14 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)
#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000175
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000176
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000177
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000178
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x00000179
+
#define REG_A4XX_RBBM_GPU_BUSY_MASKED 0x0000017a
#define REG_A4XX_RBBM_INT_0_STATUS 0x0000017d
@@ -1272,6 +2200,20 @@ static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240
#define REG_A4XX_CP_PERFCTR_CP_SEL_0 0x00000500
+#define REG_A4XX_CP_PERFCTR_CP_SEL_1 0x00000501
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_2 0x00000502
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_3 0x00000503
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_4 0x00000504
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_5 0x00000505
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_6 0x00000506
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_7 0x00000507
+
#define REG_A4XX_CP_PERFCOMBINER_SELECT 0x0000050b
static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; }
@@ -1802,6 +2744,12 @@ static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A4XX_VPC_DEBUG_ECO_CONTROL 0x00000e64
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0 0x00000e65
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1 0x00000e66
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2 0x00000e67
+
#define REG_A4XX_VPC_PERFCTR_VPC_SEL_3 0x00000e68
#define REG_A4XX_VPC_ATTR 0x00002140
@@ -1914,6 +2862,20 @@ static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0
#define REG_A4XX_VFD_DEBUG_CONTROL 0x00000e40
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0 0x00000e43
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1 0x00000e44
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2 0x00000e45
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3 0x00000e46
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4 0x00000e47
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5 0x00000e48
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6 0x00000e49
+
#define REG_A4XX_VFD_PERFCTR_VFD_SEL_7 0x00000e4a
#define REG_A4XX_VGT_CL_INITIATOR 0x000021d0
@@ -2070,6 +3032,20 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
#define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0 0x00000f04
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1 0x00000f05
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2 0x00000f06
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3 0x00000f07
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4 0x00000f08
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5 0x00000f09
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6 0x00000f0a
+
#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b
#define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380
@@ -2124,8 +3100,20 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c88
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c89
+
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c8a
+
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c8c
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c8d
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c8e
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c8f
+
#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000
#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000
#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
@@ -2391,6 +3379,20 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
#define REG_A4XX_UCHE_CACHE_WAYS_VFD 0x00000e8c
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e8e
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e8f
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e90
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e91
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e92
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e93
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e94
+
#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e95
#define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD 0x00000e00
@@ -2401,6 +3403,22 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
#define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e06
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e07
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e08
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e09
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e0a
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e0b
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e0c
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e0d
+
#define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0
#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010
#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4
@@ -2655,6 +3673,18 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
#define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10
+#define REG_A4XX_PC_PERFCTR_PC_SEL_1 0x00000d11
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_2 0x00000d12
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_3 0x00000d13
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_4 0x00000d14
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_5 0x00000d15
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_6 0x00000d16
+
#define REG_A4XX_PC_PERFCTR_PC_SEL_7 0x00000d17
#define REG_A4XX_PC_BIN_BASE 0x000021c0
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
index 074c5a752bf..0c1027d5804 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -49,6 +49,8 @@ struct fd4_context {
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
+ *
+ * (upper area used as scratch bo.. see fd4_query)
*/
struct fd_bo *vsc_size_mem;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 4a3f1da30ed..72154bf286a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
#include "fd4_emit.h"
#include "fd4_blend.h"
@@ -882,6 +883,8 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
OUT_RING(ring, 0x0);
+ fd_hw_query_enable(ctx, ring);
+
ctx->needs_rb_fbd = true;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
index 4f69e0c1694..14a809431ac 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
@@ -31,6 +31,7 @@
#include "freedreno_util.h"
#include "fd4_query.h"
+#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_format.h"
@@ -81,7 +82,12 @@ static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end)
{
- return end->ctr[0] - start->ctr[0];
+ uint64_t n = 0;
+
+ for (unsigned i = 0; i < 16; i += 4)
+ n += end->ctr[i] - start->ctr[i];
+
+ return n / 2;
}
static void
@@ -102,6 +108,127 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
result->b |= (n > 0);
}
+/*
+ * Time Elapsed Query:
+ *
+ * Note: we could in theory support timestamp queries, but they
+ * won't give sensible results for tilers.
+ */
+
+static void
+time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ /* Right now, the assignment of countable to counter register is
+ * just hard coded. If we start exposing more countables than we
+ * have counters, we will need to be more clever.
+ */
+ fd_wfi(ctx, ring);
+ OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
+ OUT_RING(ring, CP_ALWAYS_COUNT);
+}
+
+static struct fd_hw_sample *
+time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t));
+
+ /* use unused part of vsc_size_mem as scratch space, to avoid
+ * extra allocation:
+ */
+ struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem;
+ const int sample_off = 128;
+ const int addr_off = sample_off + 8;
+
+ debug_assert(ctx->screen->max_freq > 0);
+
+ /* Basic issue is that we need to read counter value to a relative
+ * destination (with per-tile offset) rather than absolute dest
+ * addr. But there is no pm4 packet that can do that. This is
+ * where it would be *really* nice if we could write our own fw
+ * since afaict implementing the sort of packet we need would be
+ * trivial.
+ *
+ * Instead, we:
+ * (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
+ * (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
+ * (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
+ * address to the per-sample offset in the scratch buffer
+ * (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
+ * to CP_ME_NRT_ADDR
+ * (5) CP_MEM_TO_REG's to copy saved counter value from scratch
+ * buffer to CP_ME_NRT_DATA to trigger the write out to query
+ * result buffer
+ *
+ * Straightforward, right?
+ *
+ * Maybe could swap the order of things in the scratch buffer to
+ * put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
+ * shot, but that's really just polishing a turd..
+ */
+
+ fd_wfi(ctx, ring);
+
+ /* copy sample counter _LO and _HI to scratch: */
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
+ CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */
+ OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
+
+ /* ok... here we really *would* like to use the CP_SET_CONSTANT
+ * mode which can add a constant to value in reg2 and write to
+ * reg1... *but* that only works for banked/context registers,
+ * and CP_ME_NRT_DATA isn't one of those.. so we need to do some
+ * CP math to the scratch buffer instead:
+ *
+ * (note first 8 bytes are counter value, use offset 0x8 for
+ * address calculation)
+ */
+
+ /* per-sample offset to scratch bo: */
+ OUT_PKT3(ring, CP_MEM_WRITE, 2);
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+ OUT_RING(ring, samp->offset);
+
+ /* now add to that the per-tile base: */
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
+ CP_REG_TO_MEM_0_ACCUMULATE |
+ CP_REG_TO_MEM_0_CNT(1-1)); /* readback 1 regs */
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+
+ /* now copy that back to CP_ME_NRT_ADDR: */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+
+ /* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
+ * to trigger the write to result buffer
+ */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
+ OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
+
+ /* and again to get the value of the _HI reg from scratch: */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
+ OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
+
+ /* Sigh.. */
+
+ return samp;
+}
+
+static void
+time_elapsed_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
+ /* max_freq is in Hz, convert cycle count to ns: */
+ result->u64 += n * 1000000000 / ctx->screen->max_freq;
+}
+
static const struct fd_hw_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.active = FD_STAGE_DRAW,
@@ -116,8 +243,17 @@ static const struct fd_hw_sample_provider occlusion_predicate = {
.accumulate_result = occlusion_predicate_accumulate_result,
};
+static const struct fd_hw_sample_provider time_elapsed = {
+ .query_type = PIPE_QUERY_TIME_ELAPSED,
+ .active = FD_STAGE_DRAW,
+ .enable = time_elapsed_enable,
+ .get_sample = time_elapsed_get_sample,
+ .accumulate_result = time_elapsed_accumulate_result,
+};
+
void fd4_query_context_init(struct pipe_context *pctx)
{
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
+ fd_hw_query_register_provider(pctx, &time_elapsed);
}
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index f9c0e6aaa83..ac5343f1a78 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index c6741890c69..09b26a253f0 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -172,6 +173,11 @@ enum adreno_pm4_type3_packets {
CP_UNKNOWN_1A = 26,
CP_UNKNOWN_4E = 78,
CP_WIDE_REG_WRITE = 116,
+ CP_SCRATCH_TO_REG = 77,
+ CP_REG_TO_SCRATCH = 74,
+ CP_WAIT_MEM_WRITES = 18,
+ CP_COND_REG_EXEC = 71,
+ CP_MEM_TO_REG = 66,
IN_IB_PREFETCH_END = 23,
IN_SUBBLK_PREFETCH = 31,
IN_INSTR_PREFETCH = 32,
@@ -503,5 +509,29 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
}
+#define REG_CP_REG_TO_MEM_0 0x00000000
+#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff
+#define CP_REG_TO_MEM_0_REG__SHIFT 0
+static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK;
+}
+#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000
+#define CP_REG_TO_MEM_0_CNT__SHIFT 19
+static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK;
+}
+#define CP_REG_TO_MEM_0_64B 0x40000000
+#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000
+
+#define REG_CP_REG_TO_MEM_1 0x00000001
+#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff
+#define CP_REG_TO_MEM_1_DEST__SHIFT 0
+static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK;
+}
+
#endif /* ADRENO_PM4_XML */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 9e7130ab915..85ce97c16b7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -164,6 +164,9 @@ struct fd_context {
*/
struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
+ /* which sample providers were active in the current batch: */
+ uint32_t active_providers;
+
/* tracking for current stage, to know when to start/stop
* any active queries:
*/
diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h
index c2c71da2790..1e4f45ffcd3 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.h
+++ b/src/gallium/drivers/freedreno/freedreno_query.h
@@ -65,4 +65,16 @@ fd_query(struct pipe_query *pq)
void fd_query_screen_init(struct pipe_screen *pscreen);
void fd_query_context_init(struct pipe_context *pctx);
+static inline bool
+skip_begin_query(int type)
+{
+ switch (type) {
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_GPU_FINISHED:
+ return true;
+ default:
+ return false;
+ }
+}
+
#endif /* FREEDRENO_QUERY_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c
index 027fdc9de23..2ac03f22b41 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -47,6 +47,8 @@ static int pidx(unsigned query_type)
return 0;
case PIPE_QUERY_OCCLUSION_PREDICATE:
return 1;
+ case PIPE_QUERY_TIME_ELAPSED:
+ return 2;
default:
return -1;
}
@@ -89,7 +91,9 @@ static void
resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
struct fd_ringbuffer *ring)
{
+ int idx = pidx(hq->provider->query_type);
assert(!hq->period);
+ ctx->active_providers |= (1 << idx);
hq->period = util_slab_alloc(&ctx->sample_period_pool);
list_inithead(&hq->period->list);
hq->period->start = get_sample(ctx, ring, hq->base.type);
@@ -101,7 +105,9 @@ static void
pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
struct fd_ringbuffer *ring)
{
+ int idx = pidx(hq->provider->query_type);
assert(hq->period && !hq->period->end);
+ assert(ctx->active_providers & (1 << idx));
hq->period->end = get_sample(ctx, ring, hq->base.type);
list_addtail(&hq->period->list, &hq->current_periods);
hq->period = NULL;
@@ -156,6 +162,12 @@ static void
fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
{
struct fd_hw_query *hq = fd_hw_query(q);
+ /* there are a couple special cases, which don't have
+ * a matching ->begin_query():
+ */
+ if (skip_begin_query(q->type) && !q->active) {
+ fd_hw_begin_query(ctx, q);
+ }
if (!q->active)
return;
if (is_active(hq, ctx->stage))
@@ -291,6 +303,8 @@ fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
pipe_reference_init(&samp->reference, 1);
samp->size = size;
+ debug_assert(util_is_power_of_two(size));
+ ctx->next_sample_offset = align(ctx->next_sample_offset, size);
samp->offset = ctx->next_sample_offset;
/* NOTE: util_slab_alloc() does not zero out the buffer: */
samp->bo = NULL;
@@ -318,7 +332,7 @@ prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
assert(samp->tile_stride == tile_stride);
return;
}
- samp->bo = bo;
+ samp->bo = fd_bo_ref(bo);
samp->num_tiles = num_tiles;
samp->tile_stride = tile_stride;
}
@@ -431,6 +445,23 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->stage = stage;
}
+/* call the provider->enable() for all the hw queries that were active
+ * in the current batch. This sets up perfctr selector regs statically
+ * for the duration of the batch.
+ */
+void
+fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
+ if (ctx->active_providers & (1 << idx)) {
+ assert(ctx->sample_providers[idx]);
+ if (ctx->sample_providers[idx]->enable)
+ ctx->sample_providers[idx]->enable(ctx, ring);
+ }
+ }
+ ctx->active_providers = 0; /* clear it for next frame */
+}
+
void
fd_hw_query_register_provider(struct pipe_context *pctx,
const struct fd_hw_sample_provider *provider)
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h
index 8f4b1f58ee5..8a5d114d806 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.h
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h
@@ -76,6 +76,11 @@ struct fd_hw_sample_provider {
/* stages applicable to the query type: */
enum fd_render_stage active;
+ /* Optional hook for enabling a counter. Guaranteed to happen
+ * at least once before the first ->get_sample() in a batch.
+ */
+ void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring);
+
/* when a new sample is required, emit appropriate cmdstream
* and return a sample object:
*/
@@ -144,6 +149,7 @@ void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
struct fd_ringbuffer *ring);
void fd_hw_query_set_stage(struct fd_context *ctx,
struct fd_ringbuffer *ring, enum fd_render_stage stage);
+void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring);
void fd_hw_query_register_provider(struct pipe_context *pctx,
const struct fd_hw_sample_provider *provider);
void fd_hw_query_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 27f4d267438..2b3ecfe664e 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -298,12 +298,14 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return is_a3xx(screen) ? 1 : 0;
/* Queries. */
- case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
return is_a3xx(screen) || is_a4xx(screen);
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ /* only a4xx, requires new enough kernel so we know max_freq: */
+ return (screen->max_freq > 0) && is_a4xx(screen);
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -434,9 +436,12 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
}
debug_printf("unknown shader param %d\n", param);
@@ -534,6 +539,16 @@ fd_screen_create(struct fd_device *dev)
}
screen->device_id = val;
+ if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) {
+ DBG("could not get gpu freq");
+ /* this limits what performance related queries are
+ * supported but is not fatal
+ */
+ screen->max_freq = 0;
+ } else {
+ screen->max_freq = val;
+ }
+
if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
DBG("could not get gpu-id");
goto fail;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index 8fb096a10dd..a81c7786390 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -56,6 +56,7 @@ struct fd_screen {
uint32_t device_id;
uint32_t gpu_id; /* 220, 305, etc */
uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */
+ uint32_t max_freq;
uint32_t max_rts; /* max # of render targets */
void *compiler; /* currently unused for a2xx */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index ffa75775505..7a1812f2518 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1365,7 +1365,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
struct ir3_block *b = ctx->block;
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
- struct ir3_instruction *const_off[4];
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;
diff --git a/src/gallium/drivers/ilo/ilo_gpgpu.c b/src/gallium/drivers/ilo/ilo_gpgpu.c
index b7415901a88..ab165b6d43b 100644
--- a/src/gallium/drivers/ilo/ilo_gpgpu.c
+++ b/src/gallium/drivers/ilo/ilo_gpgpu.c
@@ -79,9 +79,7 @@ launch_grid(struct ilo_context *ilo,
}
static void
-ilo_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t pc, const void *input)
+ilo_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *cs = ilo->state_vector.cs;
@@ -92,13 +90,13 @@ ilo_launch_grid(struct pipe_context *pipe,
input_buf.buffer_size =
ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
if (input_buf.buffer_size) {
- u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input,
+ u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, info->input,
&input_buf.buffer_offset, &input_buf.buffer);
}
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
- launch_grid(ilo, block_layout, grid_layout, &input_buf, pc);
+ launch_grid(ilo, info->block, info->grid, &input_buf, info->pc);
ilo_render_invalidate_hw(ilo->render);
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 44d7c11af43..ef9da6b8315 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -136,6 +136,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
return ILO_MAX_SAMPLER_VIEWS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 8dc2d38e039..f8d2637cc6f 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -1851,7 +1851,7 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader,
static void
ilo_set_shader_images(struct pipe_context *pipe, unsigned shader,
unsigned start, unsigned count,
- struct pipe_image_view **views)
+ struct pipe_image_view *views)
{
#if 0
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index d22e50777fa..9e56c962d2d 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -910,7 +910,9 @@ lp_rast_create( unsigned num_threads )
create_rast_threads(rast);
/* for synchronizing rasterization threads */
- pipe_barrier_init( &rast->barrier, rast->num_threads );
+ if (rast->num_threads > 0) {
+ pipe_barrier_init( &rast->barrier, rast->num_threads );
+ }
memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
@@ -967,7 +969,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
}
/* for synchronizing rasterization threads */
- pipe_barrier_destroy( &rast->barrier );
+ if (rast->num_threads > 0) {
+ pipe_barrier_destroy( &rast->barrier );
+ }
lp_scene_queue_destroy(rast->full_scenes);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 5ab297d7e1a..97146912704 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -169,8 +169,8 @@ struct lp_setup_context
};
static inline void
-scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
- struct u_rect *scissor)
+scissor_planes_needed(boolean scis_planes[4], const struct u_rect *bbox,
+ const struct u_rect *scissor)
{
/* left */
scis_planes[0] = (bbox->x0 < scissor->x0);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index af4e7900d3c..018130c3192 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -719,7 +719,7 @@ try_setup_line( struct lp_setup_context *setup,
*/
if (nr_planes > 4) {
/* why not just use draw_regions */
- struct u_rect *scissor = &setup->scissors[viewport_index];
+ const struct u_rect *scissor = &setup->scissors[viewport_index];
struct lp_rast_plane *plane_s = &plane[4];
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, scissor);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index cdb3d015dec..29aee726941 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -681,7 +681,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
*/
if (nr_planes > 3) {
/* why not just use draw_regions */
- struct u_rect *scissor = &setup->scissors[viewport_index];
+ const struct u_rect *scissor = &setup->scissors[viewport_index];
struct lp_rast_plane *plane_s = &plane[3];
boolean s_planes[4];
scissor_planes_needed(s_planes, &bbox, scissor);
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 31a93659647..43ffce63a25 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -60,6 +60,8 @@ NV30_C_SOURCES := \
nv30/nvfx_vertprog.c
NV50_C_SOURCES := \
+ nv50/g80_defs.xml.h \
+ nv50/g80_texture.xml.h \
nv50/nv50_2d.xml.h \
nv50/nv50_3ddefs.xml.h \
nv50/nv50_3d.xml.h \
@@ -68,7 +70,6 @@ NV50_C_SOURCES := \
nv50/nv50_compute.xml.h \
nv50/nv50_context.c \
nv50/nv50_context.h \
- nv50/nv50_defs.xml.h \
nv50/nv50_formats.c \
nv50/nv50_miptree.c \
nv50/nv50_program.c \
@@ -93,7 +94,6 @@ NV50_C_SOURCES := \
nv50/nv50_state_validate.c \
nv50/nv50_surface.c \
nv50/nv50_tex.c \
- nv50/nv50_texture.xml.h \
nv50/nv50_transfer.c \
nv50/nv50_transfer.h \
nv50/nv50_vbo.c \
@@ -147,6 +147,7 @@ NVC0_CODEGEN_SOURCES := \
codegen/nv50_ir_target_nvc0.h
NVC0_C_SOURCES := \
+ nvc0/gm107_texture.xml.h \
nvc0/nvc0_3d.xml.h \
nvc0/nvc0_compute.c \
nvc0/nvc0_compute.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 9d7becf27d4..97ebed455b6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -232,6 +232,8 @@ enum operation
#define NV50_IR_SUBOP_SHFL_UP 1
#define NV50_IR_SUBOP_SHFL_DOWN 2
#define NV50_IR_SUBOP_SHFL_BFLY 3
+#define NV50_IR_SUBOP_LOAD_LOCKED 1
+#define NV50_IR_SUBOP_STORE_UNLOCKED 2
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 0c7cd1d8137..a78b3f954a4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -433,6 +433,10 @@ CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
break;
default:
+ if (i->op == OP_SELP) {
+ assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
+ srcId(i->src(s), 42);
+ }
// ignore here, can be predicate or flags, but must not be address
break;
}
@@ -1045,7 +1049,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
{
emitForm_21(i, 0x250, 0x050);
- if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
+ if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 13;
}
@@ -1239,7 +1243,7 @@ CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask
defId(i->def(0), 2);
srcId(i->src(0), 10);
- srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23);
+ srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[1] |= 1 << 9; // dall
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index dee26225b7e..93c40d15e46 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -193,6 +193,8 @@ private:
void emitNOP();
void emitKIL();
void emitOUT();
+
+ void emitMEMBAR();
};
/*******************************************************************************
@@ -248,6 +250,8 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val)
case SV_INVOCATION_ID : id = 0x11; break;
case SV_THREAD_KILL : id = 0x13; break;
case SV_INVOCATION_INFO: id = 0x1d; break;
+ case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
+ case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
default:
assert(!"invalid system value");
id = 0;
@@ -1531,7 +1535,10 @@ CodeEmitterGM107::emitFSWZADD()
emitRND (0x27);
emitField(0x26, 1, insn->lanes); /* abused for .ndv */
emitField(0x1c, 8, insn->subOp);
- emitGPR (0x14, insn->src(1));
+ if (insn->predSrc != 1)
+ emitGPR (0x14, insn->src(1));
+ else
+ emitGPR (0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
@@ -2327,22 +2334,34 @@ void
CodeEmitterGM107::emitATOM()
{
unsigned dType, subOp;
- switch (insn->dType) {
- case TYPE_U32: dType = 0; break;
- case TYPE_S32: dType = 1; break;
- case TYPE_U64: dType = 2; break;
- case TYPE_F32: dType = 3; break;
- case TYPE_B128: dType = 4; break;
- case TYPE_S64: dType = 5; break;
- default: assert(!"unexpected dType"); dType = 0; break;
- }
- if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
- subOp = 8;
- else
- subOp = insn->subOp;
- assert(insn->subOp != NV50_IR_SUBOP_ATOM_CAS); /* XXX */
- emitInsn (0xed000000);
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ switch (insn->dType) {
+ case TYPE_U32: dType = 0; break;
+ case TYPE_U64: dType = 1; break;
+ default: assert(!"unexpected dType"); dType = 0; break;
+ }
+ subOp = 15;
+
+ emitInsn (0xee000000);
+ } else {
+ switch (insn->dType) {
+ case TYPE_U32: dType = 0; break;
+ case TYPE_S32: dType = 1; break;
+ case TYPE_U64: dType = 2; break;
+ case TYPE_F32: dType = 3; break;
+ case TYPE_B128: dType = 4; break;
+ case TYPE_S64: dType = 5; break;
+ default: assert(!"unexpected dType"); dType = 0; break;
+ }
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
+ subOp = 8;
+ else
+ subOp = insn->subOp;
+
+ emitInsn (0xed000000);
+ }
+
emitField(0x34, 4, subOp);
emitField(0x31, 3, dType);
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
@@ -2627,6 +2646,13 @@ CodeEmitterGM107::emitOUT()
emitGPR (0x00, insn->def(0));
}
+void
+CodeEmitterGM107::emitMEMBAR()
+{
+ emitInsn (0xef980000);
+ emitField(0x08, 2, insn->subOp >> 2);
+}
+
/*******************************************************************************
* assembler front-end
******************************************************************************/
@@ -2926,6 +2952,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_RESTART:
emitOUT();
break;
+ case OP_MEMBAR:
+ emitMEMBAR();
+ break;
default:
assert(!"invalid opcode");
emitNOP();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index bc8354deba1..682a19d6d78 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -527,7 +527,8 @@ CodeEmitterNV50::emitForm_ADD(const Instruction *i)
setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
setSrc(i, 0, 0);
- setSrc(i, 1, 2);
+ if (i->predSrc != 1)
+ setSrc(i, 1, 2);
if (i->getIndirect(0, 0)) {
assert(!i->getIndirect(1, 0));
@@ -840,7 +841,7 @@ CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
emitForm_ADD(i);
- if (!i->srcExists(1))
+ if (!i->srcExists(1) || i->predSrc == 1)
srcId(i->src(0), 32 + 14);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 8637db91521..0068da5cbb7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -398,6 +398,11 @@ CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
break;
default:
+ if (i->op == OP_SELP) {
+ // OP_SELP is used to implement shared+atomics on Fermi.
+ assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
+ srcId(i->src(s), 49);
+ }
// ignore here, can be predicate or flags, but must not be address
break;
}
@@ -1174,7 +1179,7 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i)
{
emitForm_A(i, HEX64(20000000, 00000004));
- if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
+ if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 20;
}
@@ -1334,7 +1339,7 @@ CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
defId(i->def(0), 14);
srcId(i->src(0), 20);
- srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
+ srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[0] |= 1 << 9; // dall
@@ -1773,7 +1778,16 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
- case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
+ case FILE_MEMORY_SHARED:
+ if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ opc = 0xb8000000;
+ else
+ opc = 0xcc000000;
+ } else {
+ opc = 0xc9000000;
+ }
+ break;
default:
assert(!"invalid memory file");
opc = 0;
@@ -1782,6 +1796,15 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
code[0] = 0x00000005;
code[1] = opc;
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
+ // Unlocked store on shared memory can fail.
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
+ i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
+ assert(i->defExists(0));
+ defId(i->def(0), 8);
+ }
+ }
+
setAddressByFile(i->src(0));
srcId(i->src(1), 14);
srcId(i->src(0).getIndirect(0), 20);
@@ -1804,7 +1827,16 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
- case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
+ case FILE_MEMORY_SHARED:
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+ opc = 0xa8000000;
+ else
+ opc = 0xc4000000;
+ } else {
+ opc = 0xc1000000;
+ }
+ break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
emitMOV(i); // not sure if this is any better
@@ -1820,6 +1852,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
}
code[1] = opc;
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
+ if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
+ assert(i->defExists(1));
+ defId(i->def(1), 32 + 18);
+ }
+ }
+
defId(i->def(0), 14);
setAddressByFile(i->src(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 52ac198221d..d06e9efa463 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -374,6 +374,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@@ -858,6 +859,11 @@ public:
};
std::vector<Resource> resources;
+ struct MemoryFile {
+ bool shared;
+ };
+ std::vector<MemoryFile> memoryFiles;
+
private:
int inferSysValDirection(unsigned sn) const;
bool scanDeclaration(const struct tgsi_full_declaration *);
@@ -904,6 +910,7 @@ bool Source::scanSource()
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
+ memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
info->immd.bufSize = 0;
@@ -1213,6 +1220,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
break;
+ case TGSI_FILE_MEMORY:
+ for (i = first; i <= last; ++i)
+ memoryFiles[i].shared = decl->Declaration.Shared;
+ break;
+ case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
for (i = first; i <= last; ++i)
tempArrayId[i] = arrayId;
@@ -1220,7 +1232,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
first, last - first + 1)));
break;
- case TGSI_FILE_NULL:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
@@ -1516,6 +1527,9 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
sym->reg.fileIndex = fileIdx;
+ if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared)
+ sym->setFile(FILE_MEMORY_SHARED);
+
if (idx >= 0) {
if (sym->reg.file == FILE_SHADER_INPUT)
sym->setOffset(info->in[idx].slot[c] * 4);
@@ -1769,7 +1783,7 @@ Converter::acquireDst(int d, int c)
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
- if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
+ if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY)
return NULL;
if (dst.isIndirect(0) ||
@@ -2239,7 +2253,8 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
- if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
+ tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@@ -2248,9 +2263,10 @@ Converter::handleLOAD(Value *dst0[4])
Symbol *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
+ tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
} else {
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 4 * c);
}
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
@@ -2337,7 +2353,8 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
- if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER ||
+ tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) {
for (c = 0; c < 4; ++c) {
if (!(tgsi.getDst(0).getMask() & (1 << c)))
continue;
@@ -2346,11 +2363,11 @@ Converter::handleSTORE()
Value *off;
if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
off = NULL;
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
+ sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
} else {
off = fetchSrc(0, 0);
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
}
Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
@@ -2422,7 +2439,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
- if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
+ tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
for (int c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@@ -2431,9 +2449,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
Value *off = fetchSrc(1, c), *off2 = NULL;
Value *sym;
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
+ tgsi.getSrc(1).getValueU32(c, info));
else
- sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
+ sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
if (tgsi.getSrc(0).isIndirect(0))
off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index e7cb54bc426..d181f1574f1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq)
return true;
}
+void
+NVC0LoweringPass::handleSharedATOM(Instruction *atom)
+{
+ assert(atom->src(0).getFile() == FILE_MEMORY_SHARED);
+
+ BasicBlock *currBB = atom->bb;
+ BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false);
+ BasicBlock *joinBB = atom->bb->splitAfter(atom);
+
+ bld.setPosition(currBB, true);
+ assert(!currBB->joinAt);
+ currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
+
+ bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL);
+ currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE);
+
+ bld.setPosition(tryLockAndSetBB, true);
+
+ Instruction *ld =
+ bld.mkLoad(TYPE_U32, atom->getDef(0),
+ bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL);
+ ld->setDef(1, bld.getSSA(1, FILE_PREDICATE));
+ ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED;
+
+ Value *stVal;
+ if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
+ // Read the old value, and write the new one.
+ stVal = atom->getSrc(1);
+ } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ CmpInstruction *set =
+ bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, ld->getDef(0), atom->getSrc(1));
+ set->setPredicate(CC_P, ld->getDef(1));
+
+ Instruction *selp =
+ bld.mkOp3(OP_SELP, TYPE_U32, bld.getSSA(), ld->getDef(0),
+ atom->getSrc(2), set->getDef(0));
+ selp->src(2).mod = Modifier(NV50_IR_MOD_NOT);
+ selp->setPredicate(CC_P, ld->getDef(1));
+
+ stVal = selp->getDef(0);
+ } else {
+ operation op;
+
+ switch (atom->subOp) {
+ case NV50_IR_SUBOP_ATOM_ADD:
+ op = OP_ADD;
+ break;
+ case NV50_IR_SUBOP_ATOM_AND:
+ op = OP_AND;
+ break;
+ case NV50_IR_SUBOP_ATOM_OR:
+ op = OP_OR;
+ break;
+ case NV50_IR_SUBOP_ATOM_XOR:
+ op = OP_XOR;
+ break;
+ case NV50_IR_SUBOP_ATOM_MIN:
+ op = OP_MIN;
+ break;
+ case NV50_IR_SUBOP_ATOM_MAX:
+ op = OP_MAX;
+ break;
+ default:
+ assert(0);
+ }
+
+ Instruction *i =
+ bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0),
+ atom->getSrc(1));
+ i->setPredicate(CC_P, ld->getDef(1));
+
+ stVal = i->getDef(0);
+ }
+
+ Instruction *st =
+ bld.mkStore(OP_STORE, TYPE_U32,
+ bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0),
+ NULL, stVal);
+ st->setPredicate(CC_P, ld->getDef(1));
+ st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED;
+
+ // Loop until the lock is acquired.
+ bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1));
+ tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK);
+ tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS);
+ bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);
+
+ bld.remove(atom);
+
+ bld.setPosition(joinBB, false);
+ bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
+}
+
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
@@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
sv = SV_LBASE;
break;
case FILE_MEMORY_SHARED:
- sv = SV_SBASE;
- break;
+ handleSharedATOM(atom);
+ return true;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
@@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
bool
NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
{
+ if (cas->src(0).getFile() == FILE_MEMORY_SHARED) {
+ // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM().
+ return false;
+ }
+
if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
cas->subOp != NV50_IR_SUBOP_ATOM_EXCH)
return false;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 09ec7e69ddc..6eb8aff3036 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -105,6 +105,7 @@ protected:
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
+ void handleSharedATOM(Instruction *);
void checkPredicate(Instruction *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 05b8db4a3d8..6192c0665e4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1539,6 +1539,7 @@ private:
void handleCVT_CVT(Instruction *);
void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
+ void handleNEG(Instruction *);
BuildUtil bld;
};
@@ -1634,6 +1635,9 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
return false;
+ if (src->getInsn()->saturate)
+ return false;
+
if (src->getInsn()->postFactor)
return false;
if (toOp == OP_SAD) {
@@ -2011,6 +2015,34 @@ AlgebraicOpt::handleSUCLAMP(Instruction *insn)
insn->setSrc(0, add->getSrc(s));
}
+// NEG(AND(SET, 1)) -> SET
+void
+AlgebraicOpt::handleNEG(Instruction *i) {
+ Instruction *src = i->getSrc(0)->getInsn();
+ ImmediateValue imm;
+ int b;
+
+ if (isFloatType(i->sType) || !src || src->op != OP_AND)
+ return;
+
+ if (src->src(0).getImmediate(imm))
+ b = 1;
+ else if (src->src(1).getImmediate(imm))
+ b = 0;
+ else
+ return;
+
+ if (!imm.isInteger(1))
+ return;
+
+ Instruction *set = src->getSrc(b)->getInsn();
+ if ((set->op == OP_SET || set->op == OP_SET_AND ||
+ set->op == OP_SET_OR || set->op == OP_SET_XOR) &&
+ !isFloatType(set->dType)) {
+ i->def(0).replace(set->getDef(0), false);
+ }
+}
+
bool
AlgebraicOpt::visit(BasicBlock *bb)
{
@@ -2048,6 +2080,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
case OP_SUCLAMP:
handleSUCLAMP(i);
break;
+ case OP_NEG:
+ handleNEG(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 47285a25c33..85f77047c5c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -198,6 +198,11 @@ static const char *atomSubOpStr[] =
"add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch"
};
+static const char *ldstSubOpStr[] =
+{
+ "", "lock", "unlock"
+};
+
static const char *DataTypeStr[] =
{
"-",
@@ -537,6 +542,11 @@ void Instruction::print() const
if (subOp < Elements(atomSubOpStr))
PRINT("%s ", atomSubOpStr[subOp]);
break;
+ case OP_LOAD:
+ case OP_STORE:
+ if (subOp < Elements(ldstSubOpStr))
+ PRINT("%s ", ldstSubOpStr[subOp]);
+ break;
default:
if (subOp)
PRINT("(SUBOP:%u) ", subOp);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index de39be872e4..d877c253a17 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -968,6 +968,7 @@ GCRA::coalesce(ArrayList& insns)
case 0xf0:
case 0x100:
case 0x110:
+ case 0x120:
ret = doCoalesce(insns, JOIN_MASK_UNION);
break;
default:
@@ -2231,6 +2232,7 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
texConstraintNVE0(tex);
break;
case 0x110:
+ case 0x120:
texConstraintGM107(tex);
break;
default:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index ae0a8bb61d1..89d3a08937f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -143,6 +143,7 @@ Target *Target::create(unsigned int chipset)
STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1);
switch (chipset & ~0xf) {
case 0x110:
+ case 0x120:
return getTargetGM107(chipset);
case 0xc0:
case 0xd0:
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index b62889119c5..5be7a3dab76 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -280,6 +280,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -324,6 +325,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
new file mode 100644
index 00000000000..5d40624bb9e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
@@ -0,0 +1,279 @@
+#ifndef G80_DEFS_XML
+#define G80_DEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/g80_texture.xml ( 18837 bytes, from 2016-01-14 23:54:22)
+- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40)
+- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-02 23:45:00)
+- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42)
+- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 5388 bytes, from 2016-01-14 23:54:22)
+
+Copyright (C) 2006-2016 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define G80_VSTATUS_IDLE 0x00000000
+#define G80_VSTATUS_BUSY 0x00000001
+#define G80_VSTATUS_UNK2 0x00000002
+#define G80_VSTATUS_WAITING 0x00000003
+#define G80_VSTATUS_BLOCKED 0x00000005
+#define G80_VSTATUS_FAULTED 0x00000006
+#define G80_VSTATUS_PAUSED 0x00000007
+#define G80_TIC_SOURCE_ZERO 0x00000000
+#define G80_TIC_SOURCE_R 0x00000002
+#define G80_TIC_SOURCE_G 0x00000003
+#define G80_TIC_SOURCE_B 0x00000004
+#define G80_TIC_SOURCE_A 0x00000005
+#define G80_TIC_SOURCE_ONE_INT 0x00000006
+#define G80_TIC_SOURCE_ONE_FLOAT 0x00000007
+#define G80_TIC_TYPE_SNORM 0x00000001
+#define G80_TIC_TYPE_UNORM 0x00000002
+#define G80_TIC_TYPE_SINT 0x00000003
+#define G80_TIC_TYPE_UINT 0x00000004
+#define G80_TIC_TYPE_SNORM_FORCE_FP16 0x00000005
+#define G80_TIC_TYPE_UNORM_FORCE_FP16 0x00000006
+#define G80_TIC_TYPE_FLOAT 0x00000007
+#define G80_SURFACE_FORMAT_BITMAP 0x0000001c
+#define G80_SURFACE_FORMAT_UNK1D 0x0000001d
+#define G80_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
+#define G80_SURFACE_FORMAT_RGBA32_SINT 0x000000c1
+#define G80_SURFACE_FORMAT_RGBA32_UINT 0x000000c2
+#define G80_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3
+#define G80_SURFACE_FORMAT_RGBX32_SINT 0x000000c4
+#define G80_SURFACE_FORMAT_RGBX32_UINT 0x000000c5
+#define G80_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6
+#define G80_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7
+#define G80_SURFACE_FORMAT_RGBA16_SINT 0x000000c8
+#define G80_SURFACE_FORMAT_RGBA16_UINT 0x000000c9
+#define G80_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca
+#define G80_SURFACE_FORMAT_RG32_FLOAT 0x000000cb
+#define G80_SURFACE_FORMAT_RG32_SINT 0x000000cc
+#define G80_SURFACE_FORMAT_RG32_UINT 0x000000cd
+#define G80_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce
+#define G80_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf
+#define G80_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0
+#define G80_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1
+#define G80_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2
+#define G80_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5
+#define G80_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6
+#define G80_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7
+#define G80_SURFACE_FORMAT_RGBA8_SINT 0x000000d8
+#define G80_SURFACE_FORMAT_RGBA8_UINT 0x000000d9
+#define G80_SURFACE_FORMAT_RG16_UNORM 0x000000da
+#define G80_SURFACE_FORMAT_RG16_SNORM 0x000000db
+#define G80_SURFACE_FORMAT_RG16_SINT 0x000000dc
+#define G80_SURFACE_FORMAT_RG16_UINT 0x000000dd
+#define G80_SURFACE_FORMAT_RG16_FLOAT 0x000000de
+#define G80_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df
+#define G80_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0
+#define G80_SURFACE_FORMAT_R32_SINT 0x000000e3
+#define G80_SURFACE_FORMAT_R32_UINT 0x000000e4
+#define G80_SURFACE_FORMAT_R32_FLOAT 0x000000e5
+#define G80_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6
+#define G80_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7
+#define G80_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8
+#define G80_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9
+#define G80_SURFACE_FORMAT_RG8_UNORM 0x000000ea
+#define G80_SURFACE_FORMAT_RG8_SNORM 0x000000eb
+#define G80_SURFACE_FORMAT_RG8_SINT 0x000000ec
+#define G80_SURFACE_FORMAT_RG8_UINT 0x000000ed
+#define G80_SURFACE_FORMAT_R16_UNORM 0x000000ee
+#define G80_SURFACE_FORMAT_R16_SNORM 0x000000ef
+#define G80_SURFACE_FORMAT_R16_SINT 0x000000f0
+#define G80_SURFACE_FORMAT_R16_UINT 0x000000f1
+#define G80_SURFACE_FORMAT_R16_FLOAT 0x000000f2
+#define G80_SURFACE_FORMAT_R8_UNORM 0x000000f3
+#define G80_SURFACE_FORMAT_R8_SNORM 0x000000f4
+#define G80_SURFACE_FORMAT_R8_SINT 0x000000f5
+#define G80_SURFACE_FORMAT_R8_UINT 0x000000f6
+#define G80_SURFACE_FORMAT_A8_UNORM 0x000000f7
+#define G80_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8
+#define G80_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9
+#define G80_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa
+#define G80_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb
+#define G80_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc
+#define G80_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd
+#define G80_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe
+#define G80_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff
+#define G80_ZETA_FORMAT_Z32_FLOAT 0x0000000a
+#define G80_ZETA_FORMAT_Z16_UNORM 0x00000013
+#define G80_ZETA_FORMAT_S8_Z24_UNORM 0x00000014
+#define G80_ZETA_FORMAT_Z24_X8_UNORM 0x00000015
+#define G80_ZETA_FORMAT_Z24_S8_UNORM 0x00000016
+#define G80_ZETA_FORMAT_Z24_C8_UNORM 0x00000018
+#define G80_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019
+#define G80_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
+#define G80_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
+#define G80_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
+#define GK104_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
+#define GK104_IMAGE_FORMAT_RGBA32_SINT 0x00000003
+#define GK104_IMAGE_FORMAT_RGBA32_UINT 0x00000004
+#define GK104_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
+#define GK104_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
+#define GK104_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
+#define GK104_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
+#define GK104_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
+#define GK104_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
+#define GK104_IMAGE_FORMAT_RG32_SINT 0x0000000e
+#define GK104_IMAGE_FORMAT_RG32_UINT 0x0000000f
+#define GK104_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
+#define GK104_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
+#define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
+#define GK104_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
+#define GK104_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
+#define GK104_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
+#define GK104_IMAGE_FORMAT_RG16_UNORM 0x0000001d
+#define GK104_IMAGE_FORMAT_RG16_SNORM 0x0000001e
+#define GK104_IMAGE_FORMAT_RG16_SINT 0x0000001f
+#define GK104_IMAGE_FORMAT_RG16_UINT 0x00000020
+#define GK104_IMAGE_FORMAT_RG16_FLOAT 0x00000021
+#define GK104_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
+#define GK104_IMAGE_FORMAT_R32_SINT 0x00000027
+#define GK104_IMAGE_FORMAT_R32_UINT 0x00000028
+#define GK104_IMAGE_FORMAT_R32_FLOAT 0x00000029
+#define GK104_IMAGE_FORMAT_RG8_UNORM 0x0000002e
+#define GK104_IMAGE_FORMAT_RG8_SNORM 0x0000002f
+#define GK104_IMAGE_FORMAT_RG8_SINT 0x00000030
+#define GK104_IMAGE_FORMAT_RG8_UINT 0x00000031
+#define GK104_IMAGE_FORMAT_R16_UNORM 0x00000032
+#define GK104_IMAGE_FORMAT_R16_SNORM 0x00000033
+#define GK104_IMAGE_FORMAT_R16_SINT 0x00000034
+#define GK104_IMAGE_FORMAT_R16_UINT 0x00000035
+#define GK104_IMAGE_FORMAT_R16_FLOAT 0x00000036
+#define GK104_IMAGE_FORMAT_R8_UNORM 0x00000037
+#define GK104_IMAGE_FORMAT_R8_SNORM 0x00000038
+#define GK104_IMAGE_FORMAT_R8_SINT 0x00000039
+#define GK104_IMAGE_FORMAT_R8_UINT 0x0000003a
+#define G80_PGRAPH_DATA_ERROR_INVALID_OPERATION 0x00000003
+#define G80_PGRAPH_DATA_ERROR_INVALID_VALUE 0x00000004
+#define G80_PGRAPH_DATA_ERROR_INVALID_ENUM 0x00000005
+#define G80_PGRAPH_DATA_ERROR_INVALID_OBJECT 0x00000008
+#define G80_PGRAPH_DATA_ERROR_READ_ONLY_OBJECT 0x00000009
+#define G80_PGRAPH_DATA_ERROR_SUPERVISOR_OBJECT 0x0000000a
+#define G80_PGRAPH_DATA_ERROR_INVALID_ADDRESS_ALIGNMENT 0x0000000b
+#define G80_PGRAPH_DATA_ERROR_INVALID_BITFIELD 0x0000000c
+#define G80_PGRAPH_DATA_ERROR_BEGIN_END_ACTIVE 0x0000000d
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_BACK_OVER_LIMIT 0x0000000e
+#define G80_PGRAPH_DATA_ERROR_VIEWPORT_ID_NEEDS_GP 0x0000000f
+#define G80_PGRAPH_DATA_ERROR_RT_DOUBLE_BIND 0x00000010
+#define G80_PGRAPH_DATA_ERROR_RT_TYPES_MISMATCH 0x00000011
+#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_ZETA 0x00000012
+#define G80_PGRAPH_DATA_ERROR_FP_TOO_FEW_REGS 0x00000015
+#define G80_PGRAPH_DATA_ERROR_ZETA_FORMAT_CSAA_MISMATCH 0x00000016
+#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_MSAA 0x00000017
+#define G80_PGRAPH_DATA_ERROR_FP_INTERPOLANT_START_OVER_LIMIT 0x00000018
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_LAYER_OVER_LIMIT 0x00000019
+#define G80_PGRAPH_DATA_ERROR_RT_INVALID_ALIGNMENT 0x0000001a
+#define G80_PGRAPH_DATA_ERROR_SAMPLER_OVER_LIMIT 0x0000001b
+#define G80_PGRAPH_DATA_ERROR_TEXTURE_OVER_LIMIT 0x0000001c
+#define G80_PGRAPH_DATA_ERROR_GP_TOO_MANY_OUTPUTS 0x0000001e
+#define G80_PGRAPH_DATA_ERROR_RT_BPP128_WITH_MS8 0x0000001f
+#define G80_PGRAPH_DATA_ERROR_Z_OUT_OF_BOUNDS 0x00000021
+#define G80_PGRAPH_DATA_ERROR_XY_OUT_OF_BOUNDS 0x00000023
+#define G80_PGRAPH_DATA_ERROR_VP_ZERO_INPUTS 0x00000024
+#define G80_PGRAPH_DATA_ERROR_CP_MORE_PARAMS_THAN_SHARED 0x00000027
+#define G80_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_STRIPED 0x00000028
+#define G80_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_PACKED 0x00000029
+#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_WARPS 0x0000002a
+#define G80_PGRAPH_DATA_ERROR_CP_BLOCK_SIZE_MISMATCH 0x0000002b
+#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_LOCAL_WARPS 0x0000002c
+#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_STACK_WARPS 0x0000002d
+#define G80_PGRAPH_DATA_ERROR_CP_NO_BLOCKDIM_LATCH 0x0000002e
+#define G80_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH 0x00000031
+#define G80_PGRAPH_DATA_ERROR_ENG2D_OPERATION_ILLEGAL_FOR_DST_FORMAT 0x00000033
+#define G80_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH_B 0x00000034
+#define G80_PGRAPH_DATA_ERROR_PRIMITIVE_ID_NEEDS_GP 0x0000003f
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_VIEWPORT_OVER_LIMIT 0x00000044
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_FRONT_OVER_LIMIT 0x00000045
+#define G80_PGRAPH_DATA_ERROR_LAYER_ID_NEEDS_GP 0x00000046
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_CLIP_OVER_LIMIT 0x00000047
+#define G80_PGRAPH_DATA_ERROR_SEMANTIC_PTSZ_OVER_LIMIT 0x00000048
+#define G80_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_IN 0x00000051
+#define G80_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_OUT 0x00000053
+#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_ZETA_GF100 0x00000098
+#define G80_PGRAPH_DATA_ERROR_ENG2D_UNALIGNED_PITCH_GF100 0x000000a5
+#define G80_CG_IDLE_TIMEOUT__MASK 0x0000003f
+#define G80_CG_IDLE_TIMEOUT__SHIFT 0
+#define G80_CG_IDLE_TIMEOUT_ENABLE 0x00000040
+#define G80_CG_INTERFACE_REENABLE_TIME__MASK 0x000f0000
+#define G80_CG_INTERFACE_REENABLE_TIME__SHIFT 16
+#define G80_CG_THROTTLE_DUTY_M1__MASK 0x00f00000
+#define G80_CG_THROTTLE_DUTY_M1__SHIFT 20
+#define G80_CG_DELAY__MASK 0x0f000000
+#define G80_CG_DELAY__SHIFT 24
+#define G80_CG_CLOCK_THROTTLE_ENABLE 0x10000000
+#define G80_CG_THROTTLE_MODE__MASK 0x20000000
+#define G80_CG_THROTTLE_MODE__SHIFT 29
+#define G80_CG_THROTTLE_MODE_AUTO 0x00000000
+#define G80_CG_THROTTLE_MODE_MANUAL 0x20000000
+#define G80_CG_INTERFACE_THROTTLE_ENABLE 0x40000000
+#define G80_QUERY__SIZE 0x00000010
+#define G80_QUERY_COUNTER 0x00000000
+
+#define G80_QUERY_RES 0x00000004
+
+#define G80_QUERY_TIME 0x00000008
+
+
+#endif /* G80_DEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h b/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h
new file mode 100644
index 00000000000..542963ca452
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h
@@ -0,0 +1,451 @@
+#ifndef G80_TEXTURE_XML
+#define G80_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/g80_texture.xml ( 18837 bytes, from 2016-01-14 23:54:22)
+- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40)
+- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-02 23:45:00)
+- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42)
+- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 5388 bytes, from 2016-01-14 23:54:22)
+
+Copyright (C) 2006-2016 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define G80_TSC_WRAP_WRAP 0x00000000
+#define G80_TSC_WRAP_MIRROR 0x00000001
+#define G80_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
+#define G80_TSC_WRAP_BORDER 0x00000003
+#define G80_TSC_WRAP_CLAMP_OGL 0x00000004
+#define G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE 0x00000005
+#define G80_TSC_WRAP_MIRROR_ONCE_BORDER 0x00000006
+#define G80_TSC_WRAP_MIRROR_ONCE_CLAMP_OGL 0x00000007
+#define G80_TIC__SIZE 0x00000020
+#define G80_TIC_0 0x00000000
+#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__MASK 0x80000000
+#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT 31
+#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED_NO 0x00000000
+#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED_YES 0x80000000
+#define G84_TIC_0_PACK_COMPONENTS 0x40000000
+#define G80_TIC_0_W_SOURCE__MASK 0x38000000
+#define G80_TIC_0_W_SOURCE__SHIFT 27
+#define G80_TIC_0_Z_SOURCE__MASK 0x07000000
+#define G80_TIC_0_Z_SOURCE__SHIFT 24
+#define G80_TIC_0_Y_SOURCE__MASK 0x00e00000
+#define G80_TIC_0_Y_SOURCE__SHIFT 21
+#define G80_TIC_0_X_SOURCE__MASK 0x001c0000
+#define G80_TIC_0_X_SOURCE__SHIFT 18
+#define G80_TIC_0_A_DATA_TYPE__MASK 0x00038000
+#define G80_TIC_0_A_DATA_TYPE__SHIFT 15
+#define G80_TIC_0_B_DATA_TYPE__MASK 0x00007000
+#define G80_TIC_0_B_DATA_TYPE__SHIFT 12
+#define G80_TIC_0_G_DATA_TYPE__MASK 0x00000e00
+#define G80_TIC_0_G_DATA_TYPE__SHIFT 9
+#define G80_TIC_0_R_DATA_TYPE__MASK 0x000001c0
+#define G80_TIC_0_R_DATA_TYPE__SHIFT 6
+#define G80_TIC_0_COMPONENTS_SIZES__MASK 0x0000003f
+#define G80_TIC_0_COMPONENTS_SIZES__SHIFT 0
+#define G80_TIC_0_COMPONENTS_SIZES_R32_G32_B32_A32 0x00000001
+#define GF100_TIC_0_COMPONENTS_SIZES_R32_G32_B32 0x00000002
+#define G80_TIC_0_COMPONENTS_SIZES_R16_G16_B16_A16 0x00000003
+#define G80_TIC_0_COMPONENTS_SIZES_R32_G32 0x00000004
+#define G80_TIC_0_COMPONENTS_SIZES_R32_B24G8 0x00000005
+#define G80_TIC_0_COMPONENTS_SIZES_X8B8G8R8 0x00000007
+#define G80_TIC_0_COMPONENTS_SIZES_A8B8G8R8 0x00000008
+#define G80_TIC_0_COMPONENTS_SIZES_A2B10G10R10 0x00000009
+#define G80_TIC_0_COMPONENTS_SIZES_R16_G16 0x0000000c
+#define G80_TIC_0_COMPONENTS_SIZES_G8R24 0x0000000d
+#define G80_TIC_0_COMPONENTS_SIZES_G24R8 0x0000000e
+#define G80_TIC_0_COMPONENTS_SIZES_R32 0x0000000f
+#define G80_TIC_0_COMPONENTS_SIZES_A4B4G4R4 0x00000012
+#define G80_TIC_0_COMPONENTS_SIZES_A5B5G5R1 0x00000013
+#define G80_TIC_0_COMPONENTS_SIZES_A1B5G5R5 0x00000014
+#define G80_TIC_0_COMPONENTS_SIZES_B5G6R5 0x00000015
+#define G80_TIC_0_COMPONENTS_SIZES_B6G5R5 0x00000016
+#define G80_TIC_0_COMPONENTS_SIZES_G8R8 0x00000018
+#define G80_TIC_0_COMPONENTS_SIZES_R16 0x0000001b
+#define G80_TIC_0_COMPONENTS_SIZES_Y8_VIDEO 0x0000001c
+#define G80_TIC_0_COMPONENTS_SIZES_R8 0x0000001d
+#define G80_TIC_0_COMPONENTS_SIZES_G4R4 0x0000001e
+#define G80_TIC_0_COMPONENTS_SIZES_R1 0x0000001f
+#define G80_TIC_0_COMPONENTS_SIZES_E5B9G9R9_SHAREDEXP 0x00000020
+#define G80_TIC_0_COMPONENTS_SIZES_BF10GF11RF11 0x00000021
+#define G80_TIC_0_COMPONENTS_SIZES_G8B8G8R8 0x00000022
+#define G80_TIC_0_COMPONENTS_SIZES_B8G8R8G8 0x00000023
+#define G80_TIC_0_COMPONENTS_SIZES_DXT1 0x00000024
+#define G80_TIC_0_COMPONENTS_SIZES_DXT23 0x00000025
+#define G80_TIC_0_COMPONENTS_SIZES_DXT45 0x00000026
+#define G80_TIC_0_COMPONENTS_SIZES_DXN1 0x00000027
+#define G80_TIC_0_COMPONENTS_SIZES_DXN2 0x00000028
+#define GF100_TIC_0_COMPONENTS_SIZES_BC6H_SF16 0x00000010
+#define GF100_TIC_0_COMPONENTS_SIZES_BC6H_UF16 0x00000011
+#define GF100_TIC_0_COMPONENTS_SIZES_BC7U 0x00000017
+#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGB 0x00000006
+#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGB_PTA 0x0000000a
+#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGBA 0x0000000b
+#define GK20A_TIC_0_COMPONENTS_SIZES_EAC 0x00000019
+#define GK20A_TIC_0_COMPONENTS_SIZES_EACX2 0x0000001a
+#define G80_TIC_0_COMPONENTS_SIZES_Z24S8 0x00000029
+#define G80_TIC_0_COMPONENTS_SIZES_X8Z24 0x0000002a
+#define G80_TIC_0_COMPONENTS_SIZES_S8Z24 0x0000002b
+#define G80_TIC_0_COMPONENTS_SIZES_X4V4Z24__COV4R4V 0x0000002c
+#define G80_TIC_0_COMPONENTS_SIZES_X4V4Z24__COV8R8V 0x0000002d
+#define G80_TIC_0_COMPONENTS_SIZES_V8Z24__COV4R12V 0x0000002e
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32 0x0000002f
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X24S8 0x00000030
+#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV4R4V 0x00000031
+#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV8R8V 0x00000032
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV4R4V 0x00000033
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV8R8V 0x00000034
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV4R4V 0x00000035
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV8R8V 0x00000036
+#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV4R12V 0x00000037
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV4R12V 0x00000038
+#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV4R12V 0x00000039
+#define G200_TIC_0_COMPONENTS_SIZES_Z16 0x0000003a
+#define G200_TIC_0_COMPONENTS_SIZES_V8Z24__COV8R24V 0x0000003b
+#define G200_TIC_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV8R24V 0x0000003c
+#define G200_TIC_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV8R24V 0x0000003d
+#define G200_TIC_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV8R24V 0x0000003e
+#define G80_TIC_0_COMPONENTS_SIZES__MASK 0x0000003f
+#define G80_TIC_0_COMPONENTS_SIZES__SHIFT 0
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_4X4 0x00000000
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_5X4 0x00000010
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_5X5 0x00000001
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_6X5 0x00000011
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_6X6 0x00000002
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X5 0x00000015
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X6 0x00000012
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X8 0x00000004
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X5 0x00000016
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X6 0x00000017
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X8 0x00000013
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X10 0x00000005
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_12X10 0x00000014
+#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_12X12 0x00000006
+
+#define G80_TIC_1 0x00000004
+#define G80_TIC_1_OFFSET_LOWER__MASK 0xffffffff
+#define G80_TIC_1_OFFSET_LOWER__SHIFT 0
+
+#define G80_TIC_2 0x00000008
+#define G80_TIC_2_OFFSET_UPPER__MASK 0x000000ff
+#define G80_TIC_2_OFFSET_UPPER__SHIFT 0
+#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_LSB__MASK 0x00000300
+#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_LSB__SHIFT 8
+#define G80_TIC_2_SRGB_CONVERSION 0x00000400
+#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_MSB 0x00000800
+#define G80_TIC_2_LOD_ANISO_QUALITY_2 0x00001000
+#define G80_TIC_2_COLOR_KEY_OP 0x00002000
+#define G80_TIC_2_TEXTURE_TYPE__MASK 0x0003c000
+#define G80_TIC_2_TEXTURE_TYPE__SHIFT 14
+#define G80_TIC_2_TEXTURE_TYPE_ONE_D 0x00000000
+#define G80_TIC_2_TEXTURE_TYPE_TWO_D 0x00004000
+#define G80_TIC_2_TEXTURE_TYPE_THREE_D 0x00008000
+#define G80_TIC_2_TEXTURE_TYPE_CUBEMAP 0x0000c000
+#define G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY 0x00010000
+#define G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY 0x00014000
+#define G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER 0x00018000
+#define G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP 0x0001c000
+#define G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY 0x00020000
+#define G80_TIC_2_LAYOUT__MASK 0x00040000
+#define G80_TIC_2_LAYOUT__SHIFT 18
+#define G80_TIC_2_LAYOUT_BLOCKLINEAR 0x00000000
+#define G80_TIC_2_LAYOUT_PITCH 0x00040000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MASK 0x00380000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__SHIFT 19
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MIN 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MAX 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_ONE 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_TWO 0x00080000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_FOUR 0x00100000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_EIGHT 0x00180000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_SIXTEEN 0x00200000
+#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_THIRTYTWO 0x00280000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT__MASK 0x01c00000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT__SHIFT 22
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_ONE 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_TWO 0x00400000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_FOUR 0x00800000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_EIGHT 0x00c00000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_SIXTEEN 0x01000000
+#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_THIRTYTWO 0x01400000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH__MASK 0x0e000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH__SHIFT 25
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_ONE 0x00000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_TWO 0x02000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_FOUR 0x04000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_EIGHT 0x06000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_SIXTEEN 0x08000000
+#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_THIRTYTWO 0x0a000000
+#define G80_TIC_2_SECTOR_PROMOTION__MASK 0x30000000
+#define G80_TIC_2_SECTOR_PROMOTION__SHIFT 28
+#define G80_TIC_2_SECTOR_PROMOTION_NO_PROMOTION 0x00000000
+#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_2_V 0x10000000
+#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_2_H 0x20000000
+#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_4 0x30000000
+#define G80_TIC_2_BORDER_SOURCE__MASK 0x40000000
+#define G80_TIC_2_BORDER_SOURCE__SHIFT 30
+#define G80_TIC_2_BORDER_SOURCE_TEXTURE 0x00000000
+#define G80_TIC_2_BORDER_SOURCE_COLOR 0x40000000
+#define G80_TIC_2_NORMALIZED_COORDS 0x80000000
+
+#define G80_TIC_3 0x0000000c
+#define G80_TIC_3_PITCH__MASK 0x000fffff
+#define G80_TIC_3_PITCH__SHIFT 0
+#define G80_TIC_3_LOD_ANISO_QUALITY__MASK 0x00100000
+#define G80_TIC_3_LOD_ANISO_QUALITY__SHIFT 20
+#define G80_TIC_3_LOD_ANISO_QUALITY_LOW 0x00000000
+#define G80_TIC_3_LOD_ANISO_QUALITY_HIGH 0x00100000
+#define G80_TIC_3_LOD_ISO_QUALITY__MASK 0x00200000
+#define G80_TIC_3_LOD_ISO_QUALITY__SHIFT 21
+#define G80_TIC_3_LOD_ISO_QUALITY_LOW 0x00000000
+#define G80_TIC_3_LOD_ISO_QUALITY_HIGH 0x00200000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER__MASK 0x00c00000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER__SHIFT 22
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_NONE 0x00000000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_ONE 0x00400000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_TWO 0x00800000
+#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_SQRT 0x00c00000
+#define G80_TIC_3_ANISO_SPREAD_SCALE__MASK 0x1f000000
+#define G80_TIC_3_ANISO_SPREAD_SCALE__SHIFT 24
+#define G80_TIC_3_USE_HEADER_OPT_CONTROL 0x20000000
+#define G84_TIC_3_ANISO_CLAMP_AT_MAX_LOD 0x40000000
+#define G84_TIC_3_ANISO_POW2 0x80000000
+
+#define G80_TIC_4 0x00000010
+#define G80_TIC_4_WIDTH__MASK 0x3fffffff
+#define G80_TIC_4_WIDTH__SHIFT 0
+#define G80_TIC_4_DEPTH_TEXTURE 0x40000000
+#define G84_TIC_4_USE_TEXTURE_HEADER_V2 0x80000000
+
+#define G80_TIC_5 0x00000014
+#define G80_TIC_5_MAP_MIP_LEVEL__MASK 0xf0000000
+#define G80_TIC_5_MAP_MIP_LEVEL__SHIFT 28
+#define G80_TIC_5_DEPTH__MASK 0x0fff0000
+#define G80_TIC_5_DEPTH__SHIFT 16
+#define G80_TIC_5_HEIGHT__MASK 0x0000ffff
+#define G80_TIC_5_HEIGHT__SHIFT 0
+
+#define G80_TIC_6 0x00000018
+#define G80_TIC_6_TRILIN_OPT__MASK 0x0000001f
+#define G80_TIC_6_TRILIN_OPT__SHIFT 0
+#define G80_TIC_6_MIP_LOD_BIAS__MASK 0x0003ffe0
+#define G80_TIC_6_MIP_LOD_BIAS__SHIFT 5
+#define G80_TIC_6_MIP_LOD_BIAS__RADIX 0x00000008
+#define G80_TIC_6_ANISO_BIAS__MASK 0x00780000
+#define G80_TIC_6_ANISO_BIAS__SHIFT 19
+#define G80_TIC_6_ANISO_BIAS__RADIX 0x00000004
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC__MASK 0x01800000
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC__SHIFT 23
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_HALF 0x00000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_ONE 0x00800000
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_TWO 0x01000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_MAX 0x01800000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC__MASK 0x06000000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC__SHIFT 25
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_HALF 0x00000000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_ONE 0x02000000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_TWO 0x04000000
+#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_MAX 0x06000000
+#define G80_TIC_6_MAX_ANISOTROPY__MASK 0x38000000
+#define G80_TIC_6_MAX_ANISOTROPY__SHIFT 27
+#define G80_TIC_6_MAX_ANISOTROPY_1_TO_1 0x00000000
+#define G80_TIC_6_MAX_ANISOTROPY_2_TO_1 0x08000000
+#define G80_TIC_6_MAX_ANISOTROPY_4_TO_1 0x10000000
+#define G80_TIC_6_MAX_ANISOTROPY_6_TO_1 0x18000000
+#define G80_TIC_6_MAX_ANISOTROPY_8_TO_1 0x20000000
+#define G80_TIC_6_MAX_ANISOTROPY_10_TO_1 0x28000000
+#define G80_TIC_6_MAX_ANISOTROPY_12_TO_1 0x30000000
+#define G80_TIC_6_MAX_ANISOTROPY_16_TO_1 0x38000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER__MASK 0xc0000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER__SHIFT 30
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_NONE 0x00000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_CONST_ONE 0x40000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO 0x80000000
+#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_SQRT 0xc0000000
+
+#define G80_TIC_7 0x0000001c
+#define G80_TIC_7_COLOR_KEY_VALUE__MASK 0xffffffff
+#define G80_TIC_7_COLOR_KEY_VALUE__SHIFT 0
+
+#define G84_TIC_7 0x0000001c
+#define G84_TIC_7_RES_VIEW_MIN_MIP_LEVEL__MASK 0x0000000f
+#define G84_TIC_7_RES_VIEW_MIN_MIP_LEVEL__SHIFT 0
+#define G84_TIC_7_RES_VIEW_MAX_MIP_LEVEL__MASK 0x000000f0
+#define G84_TIC_7_RES_VIEW_MAX_MIP_LEVEL__SHIFT 4
+#define G84_TIC_7_HEIGHT_MSB 0x00000100
+#define G84_TIC_7_MULTI_SAMPLE_COUNT__MASK 0x0000f000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT__SHIFT 12
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_1X1 0x00000000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X1 0x00001000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2 0x00002000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_4X2 0x00003000
+#define GT215_TIC_7_MULTI_SAMPLE_COUNT_4X2_D3D 0x00004000
+#define GT215_TIC_7_MULTI_SAMPLE_COUNT_2X1_D3D 0x00005000
+#define GF100_TIC_7_MULTI_SAMPLE_COUNT_4X4 0x00006000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2_VC_4 0x00008000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2_VC_12 0x00009000
+#define G84_TIC_7_MULTI_SAMPLE_COUNT_4X2_VC_8 0x0000a000
+#define GF100_TIC_7_MULTI_SAMPLE_COUNT_4X2_VC_24 0x0000b000
+#define G84_TIC_7_MIN_LOD_CLAMP__MASK 0x0fff0000
+#define G84_TIC_7_MIN_LOD_CLAMP__SHIFT 16
+#define G84_TIC_7_MIN_LOD_CLAMP__RADIX 0x00000008
+#define G84_TIC_7_DEPTH_MSB__MASK 0x70000000
+#define G84_TIC_7_DEPTH_MSB__SHIFT 28
+
+#define G80_TSC__SIZE 0x00000020
+#define G80_TSC_0 0x00000000
+#define G80_TSC_0_ADDRESS_U__MASK 0x00000007
+#define G80_TSC_0_ADDRESS_U__SHIFT 0
+#define G80_TSC_0_ADDRESS_V__MASK 0x00000038
+#define G80_TSC_0_ADDRESS_V__SHIFT 3
+#define G80_TSC_0_ADDRESS_P__MASK 0x000001c0
+#define G80_TSC_0_ADDRESS_P__SHIFT 6
+#define G80_TSC_0_DEPTH_COMPARE 0x00000200
+#define G80_TSC_0_DEPTH_COMPARE_FUNC__MASK 0x00001c00
+#define G80_TSC_0_DEPTH_COMPARE_FUNC__SHIFT 10
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_NEVER 0x00000000
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_LESS 0x00000400
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_EQUAL 0x00000800
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_LEQUAL 0x00000c00
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_GREATER 0x00001000
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_NOTEQUAL 0x00001400
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_GEQUAL 0x00001800
+#define G80_TSC_0_DEPTH_COMPARE_FUNC_ALWAYS 0x00001c00
+#define G80_TSC_0_SRGB_CONVERSION 0x00002000
+#define G80_TSC_0_FONT_FILTER_WIDTH__MASK 0x0001c000
+#define G80_TSC_0_FONT_FILTER_WIDTH__SHIFT 14
+#define G80_TSC_0_FONT_FILTER_HEIGHT__MASK 0x000e0000
+#define G80_TSC_0_FONT_FILTER_HEIGHT__SHIFT 17
+#define G80_TSC_0_MAX_ANISOTROPY__MASK 0x00700000
+#define G80_TSC_0_MAX_ANISOTROPY__SHIFT 20
+#define G80_TSC_0_MAX_ANISOTROPY_1_TO_1 0x00000000
+#define G80_TSC_0_MAX_ANISOTROPY_2_TO_1 0x00100000
+#define G80_TSC_0_MAX_ANISOTROPY_4_TO_1 0x00200000
+#define G80_TSC_0_MAX_ANISOTROPY_6_TO_1 0x00300000
+#define G80_TSC_0_MAX_ANISOTROPY_8_TO_1 0x00400000
+#define G80_TSC_0_MAX_ANISOTROPY_10_TO_1 0x00500000
+#define G80_TSC_0_MAX_ANISOTROPY_12_TO_1 0x00600000
+#define G80_TSC_0_MAX_ANISOTROPY_16_TO_1 0x00700000
+
+#define G80_TSC_1 0x00000004
+#define G80_TSC_1_MAG_FILTER__MASK 0x00000003
+#define G80_TSC_1_MAG_FILTER__SHIFT 0
+#define G80_TSC_1_MAG_FILTER_NEAREST 0x00000001
+#define G80_TSC_1_MAG_FILTER_LINEAR 0x00000002
+#define G80_TSC_1_MIN_FILTER__MASK 0x00000030
+#define G80_TSC_1_MIN_FILTER__SHIFT 4
+#define G80_TSC_1_MIN_FILTER_NEAREST 0x00000010
+#define G80_TSC_1_MIN_FILTER_LINEAR 0x00000020
+#define G80_TSC_1_MIP_FILTER__MASK 0x000000c0
+#define G80_TSC_1_MIP_FILTER__SHIFT 6
+#define G80_TSC_1_MIP_FILTER_NONE 0x00000040
+#define G80_TSC_1_MIP_FILTER_NEAREST 0x00000080
+#define G80_TSC_1_MIP_FILTER_LINEAR 0x000000c0
+#define GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING 0x00000200
+#define G80_TSC_1_MIP_LOD_BIAS__MASK 0x01fff000
+#define G80_TSC_1_MIP_LOD_BIAS__SHIFT 12
+#define G80_TSC_1_MIP_LOD_BIAS__RADIX 0x00000008
+#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION__MASK 0x02000000
+#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION__SHIFT 25
+#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION_USE_HEADER_SETTING 0x00000000
+#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS 0x02000000
+#define G80_TSC_1_TRILIN_OPT__MASK 0x7c000000
+#define G80_TSC_1_TRILIN_OPT__SHIFT 26
+
+#define G80_TSC_2 0x00000008
+#define G80_TSC_2_MIN_LOD_CLAMP__MASK 0x00000fff
+#define G80_TSC_2_MIN_LOD_CLAMP__SHIFT 0
+#define G80_TSC_2_MIN_LOD_CLAMP__RADIX 0x00000008
+#define G80_TSC_2_MAX_LOD_CLAMP__MASK 0x00fff000
+#define G80_TSC_2_MAX_LOD_CLAMP__SHIFT 12
+#define G80_TSC_2_MAX_LOD_CLAMP__RADIX 0x00000008
+#define G80_TSC_2_SRGB_BORDER_COLOR_R__MASK 0xff000000
+#define G80_TSC_2_SRGB_BORDER_COLOR_R__SHIFT 24
+
+#define G80_TSC_3 0x0000000c
+#define G80_TSC_3_SRGB_BORDER_COLOR_G__MASK 0x000ff000
+#define G80_TSC_3_SRGB_BORDER_COLOR_G__SHIFT 12
+#define G80_TSC_3_SRGB_BORDER_COLOR_B__MASK 0x0ff00000
+#define G80_TSC_3_SRGB_BORDER_COLOR_B__SHIFT 20
+
+#define G80_TSC_4 0x00000010
+#define G80_TSC_4_BORDER_COLOR_R__MASK 0xffffffff
+#define G80_TSC_4_BORDER_COLOR_R__SHIFT 0
+
+#define G80_TSC_5 0x00000014
+#define G80_TSC_5_BORDER_COLOR_G__MASK 0xffffffff
+#define G80_TSC_5_BORDER_COLOR_G__SHIFT 0
+
+#define G80_TSC_6 0x00000018
+#define G80_TSC_6_BORDER_COLOR_B__MASK 0xffffffff
+#define G80_TSC_6_BORDER_COLOR_B__SHIFT 0
+
+#define G80_TSC_7 0x0000001c
+#define G80_TSC_7_BORDER_COLOR_A__MASK 0xffffffff
+#define G80_TSC_7_BORDER_COLOR_A__SHIFT 0
+
+
+#endif /* G80_TEXTURE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index 6d23fd66945..04488d6d0a6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -270,13 +270,11 @@ nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
}
void
-nv50_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t label, const void *input)
+nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
- unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
+ unsigned block_size = info->block[0] * info->block[1] * info->block[2];
struct nv50_program *cp = nv50->compprog;
bool ret;
@@ -286,10 +284,10 @@ nv50_launch_grid(struct pipe_context *pipe,
return;
}
- nv50_compute_upload_input(nv50, input);
+ nv50_compute_upload_input(nv50, info->input);
BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
- PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));
+ PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
@@ -298,14 +296,14 @@ nv50_launch_grid(struct pipe_context *pipe,
/* grid/block setup */
BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
- PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
- PUSH_DATA (push, block_layout[2]);
+ PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
+ PUSH_DATA (push, info->block[2]);
BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
PUSH_DATA (push, 1 << 16 | block_size);
BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
- PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
+ PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 342ec96d62c..2620d03b999 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -153,6 +153,7 @@ struct nv50_context {
uint32_t textures_coherent[3];
struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
unsigned num_samplers[3];
+ bool seamless_cube_map;
uint8_t num_so_targets;
uint8_t so_targets_dirty;
@@ -322,7 +323,6 @@ nv98_video_buffer_create(struct pipe_context *pipe,
/* nv50_compute.c */
void
-nv50_launch_grid(struct pipe_context *, const uint *, const uint *,
- uint32_t, const void *);
+nv50_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h
deleted file mode 100644
index aad2a851691..00000000000
--- a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h
+++ /dev/null
@@ -1,263 +0,0 @@
-#ifndef NV50_DEFS_XML
-#define NV50_DEFS_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/envytools/envytools/
-git clone https://github.com/envytools/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- rnndb/g80_defs.xml ( 18175 bytes, from 2014-09-25 06:32:11)
-- rnndb/copyright.xml ( 6452 bytes, from 2013-05-14 03:57:49)
-- rnndb/nvchipsets.xml ( 2759 bytes, from 2014-10-05 01:51:02)
-
-Copyright (C) 2006-2014 by the following authors:
-- Artur Huillet <[email protected]> (ahuillet)
-- Ben Skeggs (darktama, darktama_)
-- B. R. <[email protected]> (koala_br)
-- Carlos Martin <[email protected]> (carlosmn)
-- Christoph Bumiller <[email protected]> (calim, chrisbmr)
-- Dawid Gajownik <[email protected]> (gajownik)
-- Dmitry Baryshkov
-- Dmitry Eremin-Solenikov <[email protected]> (lumag)
-- EdB <[email protected]> (edb_)
-- Erik Waling <[email protected]> (erikwaling)
-- Francisco Jerez <[email protected]> (curro)
-- imirkin <[email protected]> (imirkin)
-- jb17bsome <[email protected]> (jb17bsome)
-- Jeremy Kolb <[email protected]> (kjeremy)
-- Laurent Carlier <[email protected]> (lordheavy)
-- Luca Barbieri <[email protected]> (lb, lb1)
-- Maarten Maathuis <[email protected]> (stillunknown)
-- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
-- Mark Carey <[email protected]> (careym)
-- Matthieu Castet <[email protected]> (mat-c)
-- nvidiaman <[email protected]> (nvidiaman)
-- Patrice Mandin <[email protected]> (pmandin, pmdata)
-- Pekka Paalanen <[email protected]> (pq, ppaalanen)
-- Peter Popov <[email protected]> (ironpeter)
-- Richard Hughes <[email protected]> (hughsient)
-- Rudi Cilibrasi <[email protected]> (cilibrar)
-- Serge Martin
-- Simon Raffeiner
-- Stephane Loeuillet <[email protected]> (leroutier)
-- Stephane Marchesin <[email protected]> (marcheu)
-- sturmflut <[email protected]> (sturmflut)
-- Sylvain Munaut <[email protected]>
-- Victor Stinner <[email protected]> (haypo)
-- Wladmir van der Laan <[email protected]> (miathan6)
-- Younes Manton <[email protected]> (ymanton)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-#define NV50_VSTATUS_IDLE 0x00000000
-#define NV50_VSTATUS_BUSY 0x00000001
-#define NV50_VSTATUS_UNK2 0x00000002
-#define NV50_VSTATUS_WAITING 0x00000003
-#define NV50_VSTATUS_BLOCKED 0x00000005
-#define NV50_VSTATUS_FAULTED 0x00000006
-#define NV50_VSTATUS_PAUSED 0x00000007
-#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c
-#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d
-#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
-#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1
-#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2
-#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3
-#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4
-#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5
-#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6
-#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7
-#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8
-#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9
-#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca
-#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb
-#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc
-#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd
-#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce
-#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf
-#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0
-#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1
-#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2
-#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5
-#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6
-#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7
-#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8
-#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9
-#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da
-#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db
-#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc
-#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd
-#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de
-#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df
-#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0
-#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3
-#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4
-#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5
-#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6
-#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7
-#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8
-#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9
-#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea
-#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb
-#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec
-#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed
-#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee
-#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef
-#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0
-#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1
-#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2
-#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3
-#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4
-#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5
-#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6
-#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7
-#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8
-#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9
-#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa
-#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb
-#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc
-#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd
-#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe
-#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff
-#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a
-#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013
-#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014
-#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015
-#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016
-#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018
-#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019
-#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
-#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
-#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
-#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
-#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003
-#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004
-#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
-#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
-#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
-#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
-#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
-#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
-#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e
-#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f
-#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
-#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
-#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
-#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
-#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
-#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
-#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d
-#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e
-#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f
-#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020
-#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021
-#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
-#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027
-#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028
-#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029
-#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e
-#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f
-#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030
-#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031
-#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032
-#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033
-#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034
-#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035
-#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036
-#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037
-#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038
-#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039
-#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a
-#define NV50_PGRAPH_DATA_ERROR_INVALID_OPERATION 0x00000003
-#define NV50_PGRAPH_DATA_ERROR_INVALID_VALUE 0x00000004
-#define NV50_PGRAPH_DATA_ERROR_INVALID_ENUM 0x00000005
-#define NV50_PGRAPH_DATA_ERROR_INVALID_OBJECT 0x00000008
-#define NV50_PGRAPH_DATA_ERROR_READ_ONLY_OBJECT 0x00000009
-#define NV50_PGRAPH_DATA_ERROR_SUPERVISOR_OBJECT 0x0000000a
-#define NV50_PGRAPH_DATA_ERROR_INVALID_ADDRESS_ALIGNMENT 0x0000000b
-#define NV50_PGRAPH_DATA_ERROR_INVALID_BITFIELD 0x0000000c
-#define NV50_PGRAPH_DATA_ERROR_BEGIN_END_ACTIVE 0x0000000d
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_BACK_OVER_LIMIT 0x0000000e
-#define NV50_PGRAPH_DATA_ERROR_VIEWPORT_ID_NEEDS_GP 0x0000000f
-#define NV50_PGRAPH_DATA_ERROR_RT_DOUBLE_BIND 0x00000010
-#define NV50_PGRAPH_DATA_ERROR_RT_TYPES_MISMATCH 0x00000011
-#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_ZETA 0x00000012
-#define NV50_PGRAPH_DATA_ERROR_FP_TOO_FEW_REGS 0x00000015
-#define NV50_PGRAPH_DATA_ERROR_ZETA_FORMAT_CSAA_MISMATCH 0x00000016
-#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_MSAA 0x00000017
-#define NV50_PGRAPH_DATA_ERROR_FP_INTERPOLANT_START_OVER_LIMIT 0x00000018
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_LAYER_OVER_LIMIT 0x00000019
-#define NV50_PGRAPH_DATA_ERROR_RT_INVALID_ALIGNMENT 0x0000001a
-#define NV50_PGRAPH_DATA_ERROR_SAMPLER_OVER_LIMIT 0x0000001b
-#define NV50_PGRAPH_DATA_ERROR_TEXTURE_OVER_LIMIT 0x0000001c
-#define NV50_PGRAPH_DATA_ERROR_GP_TOO_MANY_OUTPUTS 0x0000001e
-#define NV50_PGRAPH_DATA_ERROR_RT_BPP128_WITH_MS8 0x0000001f
-#define NV50_PGRAPH_DATA_ERROR_Z_OUT_OF_BOUNDS 0x00000021
-#define NV50_PGRAPH_DATA_ERROR_XY_OUT_OF_BOUNDS 0x00000023
-#define NV50_PGRAPH_DATA_ERROR_VP_ZERO_INPUTS 0x00000024
-#define NV50_PGRAPH_DATA_ERROR_CP_MORE_PARAMS_THAN_SHARED 0x00000027
-#define NV50_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_STRIPED 0x00000028
-#define NV50_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_PACKED 0x00000029
-#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_WARPS 0x0000002a
-#define NV50_PGRAPH_DATA_ERROR_CP_BLOCK_SIZE_MISMATCH 0x0000002b
-#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_LOCAL_WARPS 0x0000002c
-#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_STACK_WARPS 0x0000002d
-#define NV50_PGRAPH_DATA_ERROR_CP_NO_BLOCKDIM_LATCH 0x0000002e
-#define NV50_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH 0x00000031
-#define NV50_PGRAPH_DATA_ERROR_ENG2D_OPERATION_ILLEGAL_FOR_DST_FORMAT 0x00000033
-#define NV50_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH_B 0x00000034
-#define NV50_PGRAPH_DATA_ERROR_PRIMITIVE_ID_NEEDS_GP 0x0000003f
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_VIEWPORT_OVER_LIMIT 0x00000044
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_FRONT_OVER_LIMIT 0x00000045
-#define NV50_PGRAPH_DATA_ERROR_LAYER_ID_NEEDS_GP 0x00000046
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_CLIP_OVER_LIMIT 0x00000047
-#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_PTSZ_OVER_LIMIT 0x00000048
-#define NV50_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_IN 0x00000051
-#define NV50_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_OUT 0x00000053
-#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_ZETA_GF100 0x00000098
-#define NV50_PGRAPH_DATA_ERROR_ENG2D_UNALIGNED_PITCH_GF100 0x000000a5
-#define NV50_CG_IDLE_TIMEOUT__MASK 0x0000003f
-#define NV50_CG_IDLE_TIMEOUT__SHIFT 0
-#define NV50_CG_IDLE_TIMEOUT_ENABLE 0x00000040
-#define NV50_CG_INTERFACE_REENABLE_TIME__MASK 0x000f0000
-#define NV50_CG_INTERFACE_REENABLE_TIME__SHIFT 16
-#define NV50_CG_THROTTLE_DUTY_M1__MASK 0x00f00000
-#define NV50_CG_THROTTLE_DUTY_M1__SHIFT 20
-#define NV50_CG_DELAY__MASK 0x0f000000
-#define NV50_CG_DELAY__SHIFT 24
-#define NV50_CG_CLOCK_THROTTLE_ENABLE 0x10000000
-#define NV50_CG_THROTTLE_MODE__MASK 0x20000000
-#define NV50_CG_THROTTLE_MODE__SHIFT 29
-#define NV50_CG_THROTTLE_MODE_AUTO 0x00000000
-#define NV50_CG_THROTTLE_MODE_MANUAL 0x20000000
-#define NV50_CG_INTERFACE_THROTTLE_ENABLE 0x40000000
-#define NV50_QUERY__SIZE 0x00000010
-#define NV50_QUERY_COUNTER 0x00000000
-
-#define NV50_QUERY_RES 0x00000004
-
-#define NV50_QUERY_TIME 0x00000008
-
-
-#endif /* NV50_DEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 49a93bf1d91..717067cf2f7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -27,8 +27,8 @@
# include "nv50/nv50_screen.h"
# include "nv50/nv50_3d.xml.h"
#endif
-#include "nv50/nv50_texture.xml.h"
-#include "nv50/nv50_defs.xml.h"
+#include "nv50/g80_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
#include "pipe/p_defines.h"
@@ -39,10 +39,8 @@
* C: render target (color), blendable only on nvc0
* D: scanout/display target, blendable
* Z: depth/stencil
- * V: vertex fetch
* I: image / surface, implies T
*/
-#define U_V PIPE_BIND_VERTEX_BUFFER
#define U_T PIPE_BIND_SAMPLER_VIEW
#define U_I PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE
#define U_TR PIPE_BIND_RENDER_TARGET | U_T
@@ -51,38 +49,273 @@
#define U_IB PIPE_BIND_BLENDABLE | U_IR
#define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB
#define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T
-#define U_TV U_V | U_T
-#define U_TRV U_V | U_TR
-#define U_IRV U_V | U_IR
-#define U_TBV U_V | U_TB
-#define U_IBV U_V | U_IB
-#define U_TDV U_V | U_TD
#if NOUVEAU_DRIVER == 0xc0
# define U_TC U_TB
# define U_IC U_IB
-# define U_TCV U_TBV
-# define U_ICV U_IBV
# define U_t U_T
-# define U_tV U_TV
#else
# define U_TC U_TR
# define U_IC U_IR
-# define U_TCV U_TRV
-# define U_ICV U_IRV
# define U_t 0
-# define U_tV U_V
#endif
-#define NV50_ZETA_FORMAT_NONE 0
-#define NV50_SURFACE_FORMAT_NONE 0
+#define G80_ZETA_FORMAT_NONE 0
+#define G80_SURFACE_FORMAT_NONE 0
-/* for vertex buffers: */
-#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
-#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
-#define NV50_TIC_0_FMT_32_32_32 NVC0_TIC_0_FMT_32_32_32
-#define NV50_TIC_0_FMT_BPTC NVC0_TIC_0_FMT_BPTC
-#define NV50_TIC_0_FMT_BPTC_FLOAT NVC0_TIC_0_FMT_BPTC_FLOAT
-#define NV50_TIC_0_FMT_BPTC_UFLOAT NVC0_TIC_0_FMT_BPTC_UFLOAT
+#define SF_A(sz) G80_TIC_0_COMPONENTS_SIZES_##sz
+#define SF_B(sz) G200_TIC_0_COMPONENTS_SIZES_##sz
+#define SF_C(sz) GF100_TIC_0_COMPONENTS_SIZES_##sz
+#define SF(c, pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
+ [PIPE_FORMAT_##pf] = { \
+ sf, { \
+ SF_##c(sz), \
+ G80_TIC_TYPE_##t0, \
+ G80_TIC_TYPE_##t1, \
+ G80_TIC_TYPE_##t2, \
+ G80_TIC_TYPE_##t3, \
+ G80_TIC_SOURCE_##r, \
+ G80_TIC_SOURCE_##g, \
+ G80_TIC_SOURCE_##b, \
+ G80_TIC_SOURCE_##a, \
+ }, U_##u \
+ }
+
+#define C4(c, p, n, r, g, b, a, t, s, u) \
+ SF(c, p, G80_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u)
+
+#define ZX(c, p, n, r, g, b, a, t, s, u) \
+ SF(c, p, G80_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
+#define ZS(c, p, n, r, g, b, a, t, s, u) \
+ SF(c, p, G80_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
+#define SZ(c, p, n, r, g, b, a, t, s, u) \
+ SF(c, p, G80_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u)
+#define SX(c, p, r, s, u) \
+ SF(c, p, G80_ZETA_FORMAT_NONE, \
+ r, r, r, r, UINT, UINT, UINT, UINT, s, u)
+
+#define F3(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, g, b, ONE_FLOAT, t, s, u)
+#define I3(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, g, b, ONE_INT, t, s, u)
+
+#define F2(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, g, ZERO, ONE_FLOAT, t, s, u)
+#define I2(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, g, ZERO, ONE_INT, t, s, u)
+
+#define F1(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u)
+#define I1(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, r, ZERO, ZERO, ONE_INT, t, s, u)
+
+#define A1(c, p, n, r, g, b, a, t, s, u) \
+ C4(c, p, n, ZERO, ZERO, ZERO, a, t, s, u)
+
+#if NOUVEAU_DRIVER == 0xc0
+const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+#else
+const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
+#endif
+{
+ C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, TD),
+ F3(A, B8G8R8X8_UNORM, BGRX8_UNORM, B, G, R, xx, UNORM, A8B8G8R8, TD),
+ C4(A, B8G8R8A8_SRGB, BGRA8_SRGB, B, G, R, A, UNORM, A8B8G8R8, TD),
+ F3(A, B8G8R8X8_SRGB, BGRX8_SRGB, B, G, R, xx, UNORM, A8B8G8R8, TD),
+ C4(A, R8G8B8A8_UNORM, RGBA8_UNORM, R, G, B, A, UNORM, A8B8G8R8, IB),
+ F3(A, R8G8B8X8_UNORM, RGBX8_UNORM, R, G, B, xx, UNORM, A8B8G8R8, TB),
+ C4(A, R8G8B8A8_SRGB, RGBA8_SRGB, R, G, B, A, UNORM, A8B8G8R8, TB),
+ F3(A, R8G8B8X8_SRGB, RGBX8_SRGB, R, G, B, xx, UNORM, A8B8G8R8, TB),
+
+ ZX(B, Z16_UNORM, Z16_UNORM, R, R, R, xx, UNORM, Z16, TZ),
+ ZX(A, Z32_FLOAT, Z32_FLOAT, R, R, R, xx, FLOAT, ZF32, TZ),
+ ZX(A, Z24X8_UNORM, Z24_X8_UNORM, R, R, R, xx, UNORM, X8Z24, TZ),
+ SZ(A, X8Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ),
+ ZS(A, Z24_UNORM_S8_UINT, Z24_S8_UNORM, R, R, R, xx, UNORM, S8Z24, TZ),
+ SZ(A, S8_UINT_Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ),
+ ZS(A, Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, R, R, R, xx, FLOAT, ZF32_X24S8, TZ),
+
+ SX(A, S8_UINT, R, R8, T),
+ SX(A, X24S8_UINT, G, S8Z24, T),
+ SX(A, S8X24_UINT, R, Z24S8, T),
+ SX(A, X32_S8X24_UINT, G, ZF32_X24S8, T),
+
+ F3(A, B5G6R5_UNORM, B5G6R5_UNORM, B, G, R, xx, UNORM, B5G6R5, TD),
+ C4(A, B5G5R5A1_UNORM, BGR5_A1_UNORM, B, G, R, A, UNORM, A1B5G5R5, TD),
+ F3(A, B5G5R5X1_UNORM, BGR5_X1_UNORM, B, G, R, xx, UNORM, A1B5G5R5, TD),
+ C4(A, B4G4R4A4_UNORM, NONE, B, G, R, A, UNORM, A4B4G4R4, T),
+ F3(A, B4G4R4X4_UNORM, NONE, B, G, R, xx, UNORM, A4B4G4R4, T),
+ F3(A, R9G9B9E5_FLOAT, NONE, R, G, B, xx, FLOAT, E5B9G9R9_SHAREDEXP, T),
+
+ C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, IB),
+ C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, TD),
+ C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T),
+ C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T),
+ C4(A, R10G10B10A2_UINT, RGB10_A2_UINT, R, G, B, A, UINT, A2B10G10R10, TR),
+ C4(A, B10G10R10A2_UINT, RGB10_A2_UINT, B, G, R, A, UINT, A2B10G10R10, T),
+
+ F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB),
+
+ F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
+ F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
+ F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC),
+ I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR),
+ I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR),
+ F3(A, L16_UNORM, R16_UNORM, R, R, R, xx, UNORM, R16, TC),
+ F3(A, L16_SNORM, R16_SNORM, R, R, R, xx, SNORM, R16, TC),
+ F3(A, L16_FLOAT, R16_FLOAT, R, R, R, xx, FLOAT, R16, TB),
+ I3(A, L16_SINT, R16_SINT, R, R, R, xx, SINT, R16, TR),
+ I3(A, L16_UINT, R16_UINT, R, R, R, xx, UINT, R16, TR),
+ F3(A, L32_FLOAT, R32_FLOAT, R, R, R, xx, FLOAT, R32, TB),
+ I3(A, L32_SINT, R32_SINT, R, R, R, xx, SINT, R32, TR),
+ I3(A, L32_UINT, R32_UINT, R, R, R, xx, UINT, R32, TR),
+
+ C4(A, I8_UNORM, R8_UNORM, R, R, R, R, UNORM, R8, TR),
+ C4(A, I8_SNORM, R8_SNORM, R, R, R, R, SNORM, R8, TR),
+ C4(A, I8_SINT, R8_SINT, R, R, R, R, SINT, R8, TR),
+ C4(A, I8_UINT, R8_UINT, R, R, R, R, UINT, R8, TR),
+ C4(A, I16_UNORM, R16_UNORM, R, R, R, R, UNORM, R16, TR),
+ C4(A, I16_SNORM, R16_SNORM, R, R, R, R, SNORM, R16, TR),
+ C4(A, I16_FLOAT, R16_FLOAT, R, R, R, R, FLOAT, R16, TR),
+ C4(A, I16_SINT, R16_SINT, R, R, R, R, SINT, R16, TR),
+ C4(A, I16_UINT, R16_UINT, R, R, R, R, UINT, R16, TR),
+ C4(A, I32_FLOAT, R32_FLOAT, R, R, R, R, FLOAT, R32, TR),
+ C4(A, I32_SINT, R32_SINT, R, R, R, R, SINT, R32, TR),
+ C4(A, I32_UINT, R32_UINT, R, R, R, R, UINT, R32, TR),
+
+ A1(A, A8_UNORM, A8_UNORM, xx, xx, xx, R, UNORM, R8, TB),
+ A1(A, A8_SNORM, R8_SNORM, xx, xx, xx, R, SNORM, R8, T),
+ A1(A, A8_SINT, R8_SINT, xx, xx, xx, R, SINT, R8, T),
+ A1(A, A8_UINT, R8_UINT, xx, xx, xx, R, UINT, R8, T),
+ A1(A, A16_UNORM, R16_UNORM, xx, xx, xx, R, UNORM, R16, T),
+ A1(A, A16_SNORM, R16_SNORM, xx, xx, xx, R, SNORM, R16, T),
+ A1(A, A16_FLOAT, R16_FLOAT, xx, xx, xx, R, FLOAT, R16, T),
+ A1(A, A16_SINT, R16_SINT, xx, xx, xx, R, SINT, R16, T),
+ A1(A, A16_UINT, R16_UINT, xx, xx, xx, R, UINT, R16, T),
+ A1(A, A32_FLOAT, R32_FLOAT, xx, xx, xx, R, FLOAT, R32, T),
+ A1(A, A32_SINT, R32_SINT, xx, xx, xx, R, SINT, R32, T),
+ A1(A, A32_UINT, R32_UINT, xx, xx, xx, R, UINT, R32, T),
+
+ C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T),
+ C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
+ C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T),
+ C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
+ C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T),
+ C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T),
+ C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T),
+ C4(A, L16A16_SNORM, RG16_SNORM, R, R, R, G, SNORM, R16_G16, T),
+ C4(A, L16A16_FLOAT, RG16_FLOAT, R, R, R, G, FLOAT, R16_G16, T),
+ C4(A, L16A16_SINT, RG16_SINT, R, R, R, G, SINT, R16_G16, T),
+ C4(A, L16A16_UINT, RG16_UINT, R, R, R, G, UINT, R16_G16, T),
+ C4(A, L32A32_FLOAT, RG32_FLOAT, R, R, R, G, FLOAT, R32_G32, T),
+ C4(A, L32A32_SINT, RG32_SINT, R, R, R, G, SINT, R32_G32, T),
+ C4(A, L32A32_UINT, RG32_UINT, R, R, R, G, UINT, R32_G32, T),
+
+ F3(A, DXT1_RGB, NONE, R, G, B, xx, UNORM, DXT1, T),
+ F3(A, DXT1_SRGB, NONE, R, G, B, xx, UNORM, DXT1, T),
+ C4(A, DXT1_RGBA, NONE, R, G, B, A, UNORM, DXT1, T),
+ C4(A, DXT1_SRGBA, NONE, R, G, B, A, UNORM, DXT1, T),
+ C4(A, DXT3_RGBA, NONE, R, G, B, A, UNORM, DXT23, T),
+ C4(A, DXT3_SRGBA, NONE, R, G, B, A, UNORM, DXT23, T),
+ C4(A, DXT5_RGBA, NONE, R, G, B, A, UNORM, DXT45, T),
+ C4(A, DXT5_SRGBA, NONE, R, G, B, A, UNORM, DXT45, T),
+
+ F1(A, RGTC1_UNORM, NONE, R, xx, xx, xx, UNORM, DXN1, T),
+ F1(A, RGTC1_SNORM, NONE, R, xx, xx, xx, SNORM, DXN1, T),
+ F2(A, RGTC2_UNORM, NONE, R, G, xx, xx, UNORM, DXN2, T),
+ F2(A, RGTC2_SNORM, NONE, R, G, xx, xx, SNORM, DXN2, T),
+ F3(A, LATC1_UNORM, NONE, R, R, R, xx, UNORM, DXN1, T),
+ F3(A, LATC1_SNORM, NONE, R, R, R, xx, SNORM, DXN1, T),
+ C4(A, LATC2_UNORM, NONE, R, R, R, G, UNORM, DXN2, T),
+ C4(A, LATC2_SNORM, NONE, R, R, R, G, SNORM, DXN2, T),
+
+ C4(C, BPTC_RGBA_UNORM, NONE, R, G, B, A, UNORM, BC7U, t),
+ C4(C, BPTC_SRGBA, NONE, R, G, B, A, UNORM, BC7U, t),
+ F3(C, BPTC_RGB_FLOAT, NONE, R, G, B, xx, FLOAT, BC6H_SF16, t),
+ F3(C, BPTC_RGB_UFLOAT, NONE, R, G, B, xx, FLOAT, BC6H_UF16, t),
+
+ C4(A, R32G32B32A32_FLOAT, RGBA32_FLOAT, R, G, B, A, FLOAT, R32_G32_B32_A32, IB),
+ C4(A, R32G32B32A32_UNORM, NONE, R, G, B, A, UNORM, R32_G32_B32_A32, T),
+ C4(A, R32G32B32A32_SNORM, NONE, R, G, B, A, SNORM, R32_G32_B32_A32, T),
+ C4(A, R32G32B32A32_SINT, RGBA32_SINT, R, G, B, A, SINT, R32_G32_B32_A32, IR),
+ C4(A, R32G32B32A32_UINT, RGBA32_UINT, R, G, B, A, UINT, R32_G32_B32_A32, IR),
+ F3(A, R32G32B32X32_FLOAT, RGBX32_FLOAT, R, G, B, xx, FLOAT, R32_G32_B32_A32, TB),
+ I3(A, R32G32B32X32_SINT, RGBX32_SINT, R, G, B, xx, SINT, R32_G32_B32_A32, TR),
+ I3(A, R32G32B32X32_UINT, RGBX32_UINT, R, G, B, xx, UINT, R32_G32_B32_A32, TR),
+
+ F3(C, R32G32B32_FLOAT, NONE, R, G, B, xx, FLOAT, R32_G32_B32, t),
+ I3(C, R32G32B32_SINT, NONE, R, G, B, xx, SINT, R32_G32_B32, t),
+ I3(C, R32G32B32_UINT, NONE, R, G, B, xx, UINT, R32_G32_B32, t),
+
+ F2(A, R32G32_FLOAT, RG32_FLOAT, R, G, xx, xx, FLOAT, R32_G32, IB),
+ F2(A, R32G32_UNORM, NONE, R, G, xx, xx, UNORM, R32_G32, T),
+ F2(A, R32G32_SNORM, NONE, R, G, xx, xx, SNORM, R32_G32, T),
+ I2(A, R32G32_SINT, RG32_SINT, R, G, xx, xx, SINT, R32_G32, IR),
+ I2(A, R32G32_UINT, RG32_UINT, R, G, xx, xx, UINT, R32_G32, IR),
+
+ F1(A, R32_FLOAT, R32_FLOAT, R, xx, xx, xx, FLOAT, R32, IB),
+ F1(A, R32_UNORM, NONE, R, xx, xx, xx, UNORM, R32, T),
+ F1(A, R32_SNORM, NONE, R, xx, xx, xx, SNORM, R32, T),
+ I1(A, R32_SINT, R32_SINT, R, xx, xx, xx, SINT, R32, IR),
+ I1(A, R32_UINT, R32_UINT, R, xx, xx, xx, UINT, R32, IR),
+
+ C4(A, R16G16B16A16_FLOAT, RGBA16_FLOAT, R, G, B, A, FLOAT, R16_G16_B16_A16, IB),
+ C4(A, R16G16B16A16_UNORM, RGBA16_UNORM, R, G, B, A, UNORM, R16_G16_B16_A16, IC),
+ C4(A, R16G16B16A16_SNORM, RGBA16_SNORM, R, G, B, A, SNORM, R16_G16_B16_A16, IC),
+ C4(A, R16G16B16A16_SINT, RGBA16_SINT, R, G, B, A, SINT, R16_G16_B16_A16, IR),
+ C4(A, R16G16B16A16_UINT, RGBA16_UINT, R, G, B, A, UINT, R16_G16_B16_A16, IR),
+ F3(A, R16G16B16X16_FLOAT, RGBX16_FLOAT, R, G, B, xx, FLOAT, R16_G16_B16_A16, TB),
+ F3(A, R16G16B16X16_UNORM, RGBA16_UNORM, R, G, B, xx, UNORM, R16_G16_B16_A16, T),
+ F3(A, R16G16B16X16_SNORM, RGBA16_SNORM, R, G, B, xx, SNORM, R16_G16_B16_A16, T),
+ I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, T),
+ I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, T),
+
+ F2(A, R16G16_FLOAT, RG16_FLOAT, R, G, xx, xx, FLOAT, R16_G16, IB),
+ F2(A, R16G16_UNORM, RG16_UNORM, R, G, xx, xx, UNORM, R16_G16, IC),
+ F2(A, R16G16_SNORM, RG16_SNORM, R, G, xx, xx, SNORM, R16_G16, IC),
+ I2(A, R16G16_SINT, RG16_SINT, R, G, xx, xx, SINT, R16_G16, IR),
+ I2(A, R16G16_UINT, RG16_UINT, R, G, xx, xx, UINT, R16_G16, IR),
+
+ F1(A, R16_FLOAT, R16_FLOAT, R, xx, xx, xx, FLOAT, R16, IB),
+ F1(A, R16_UNORM, R16_UNORM, R, xx, xx, xx, UNORM, R16, IC),
+ F1(A, R16_SNORM, R16_SNORM, R, xx, xx, xx, SNORM, R16, IC),
+ I1(A, R16_SINT, R16_SINT, R, xx, xx, xx, SINT, R16, IR),
+ I1(A, R16_UINT, R16_UINT, R, xx, xx, xx, UINT, R16, IR),
+
+ C4(A, R8G8B8A8_SNORM, RGBA8_SNORM, R, G, B, A, SNORM, A8B8G8R8, IC),
+ C4(A, R8G8B8A8_SINT, RGBA8_SINT, R, G, B, A, SINT, A8B8G8R8, IR),
+ C4(A, R8G8B8A8_UINT, RGBA8_UINT, R, G, B, A, UINT, A8B8G8R8, IR),
+ F3(A, R8G8B8X8_SNORM, RGBA8_SNORM, R, G, B, xx, SNORM, A8B8G8R8, T),
+ I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, T),
+ I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, T),
+
+ F2(A, R8G8_UNORM, RG8_UNORM, R, G, xx, xx, UNORM, G8R8, IB),
+ F2(A, R8G8_SNORM, RG8_SNORM, R, G, xx, xx, SNORM, G8R8, IC),
+ I2(A, R8G8_SINT, RG8_SINT, R, G, xx, xx, SINT, G8R8, IR),
+ I2(A, R8G8_UINT, RG8_UINT, R, G, xx, xx, UINT, G8R8, IR),
+
+ F1(A, R8_UNORM, R8_UNORM, R, xx, xx, xx, UNORM, R8, IB),
+ F1(A, R8_SNORM, R8_SNORM, R, xx, xx, xx, SNORM, R8, IC),
+ I1(A, R8_SINT, R8_SINT, R, xx, xx, xx, SINT, R8, IR),
+ I1(A, R8_UINT, R8_UINT, R, xx, xx, xx, UINT, R8, IR),
+
+ F3(A, R8G8_B8G8_UNORM, NONE, R, G, B, xx, UNORM, G8B8G8R8, T),
+ F3(A, G8R8_B8R8_UNORM, NONE, G, R, B, xx, UNORM, G8B8G8R8, T),
+ F3(A, G8R8_G8B8_UNORM, NONE, R, G, B, xx, UNORM, B8G8R8G8, T),
+ F3(A, R8G8_R8B8_UNORM, NONE, G, R, B, xx, UNORM, B8G8R8G8, T),
+
+ F1(A, R1_UNORM, BITMAP, R, xx, xx, xx, UNORM, R1, T),
+
+ C4(A, R4A4_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G4R4, T),
+ C4(A, R8A8_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G8R8, T),
+ C4(A, A4R4_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G4R4, T),
+ C4(A, A8R8_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G8R8, T),
+
+ SF(A, R8SG8SB8UX8U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, A8B8G8R8, T),
+ SF(A, R5SG5SB6U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, B6G5R5, T),
+};
#if NOUVEAU_DRIVER == 0xc0
# define NVXX_3D_VAF_SIZE(s) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##s
@@ -92,353 +325,138 @@
# define NVXX_3D_VAF_TYPE(t) NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t
#endif
-#define TBLENT_A_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u, br) \
- [PIPE_FORMAT_##pf] = { \
- sf, \
- (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \
- (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \
- (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \
- (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \
- (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
- (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
- (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
- (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
- NV50_TIC_0_FMT_##sz, \
- NVXX_3D_VAF_SIZE(sz) | \
- NVXX_3D_VAF_TYPE(t0) | (br << 31), \
- U_##u \
- }
-
-#define TBLENT_B_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
+#define VF_x(pf, type, size, bgra) 0
+#define VF_A(pf, type, size, bgra) \
+ NVXX_3D_VAF_SIZE(size) | NVXX_3D_VAF_TYPE(type) | (bgra << 31)
+#define VF(c, pf, type, size, bgra) \
[PIPE_FORMAT_##pf] = { \
- sf, \
- (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \
- (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \
- (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \
- (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \
- (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
- (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
- (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
- (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
- NV50_TIC_0_FMT_##sz, 0, U_##u \
+ VF_##c(pf, type, size, bgra), \
+ PIPE_BIND_VERTEX_BUFFER \
}
-#define C4A(p, n, r, g, b, a, t, s, u, br) \
- TBLENT_A_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u, br)
-#define C4B(p, n, r, g, b, a, t, s, u) \
- TBLENT_B_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u)
-
-#define ZXB(p, n, r, g, b, a, t, s, u) \
- TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
- r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
-#define ZSB(p, n, r, g, b, a, t, s, u) \
- TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
- r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
-#define SZB(p, n, r, g, b, a, t, s, u) \
- TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
- r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u)
-#define SXB(p, r, s, u) \
- TBLENT_B_(p, NV50_ZETA_FORMAT_NONE, \
- r, r, r, r, UINT, UINT, UINT, UINT, s, u)
-
-#define F3A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, g, b, ONE_FLOAT, t, s, u, 0)
-#define I3A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, g, b, ONE_INT, t, s, u, 0)
-#define F3B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, g, b, ONE_FLOAT, t, s, u)
-#define I3B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, g, b, ONE_INT, t, s, u)
-
-#define F2A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, g, ZERO, ONE_FLOAT, t, s, u, 0)
-#define I2A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, g, ZERO, ONE_INT, t, s, u, 0)
-#define F2B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, g, ZERO, ONE_FLOAT, t, s, u)
-#define I2B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, g, ZERO, ONE_INT, t, s, u)
-
-#define F1A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u, 0)
-#define I1A(p, n, r, g, b, a, t, s, u) \
- C4A(p, n, r, ZERO, ZERO, ONE_INT, t, s, u, 0)
-#define F1B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u)
-#define I1B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, r, ZERO, ZERO, ONE_INT, t, s, u)
-
-#define A1B(p, n, r, g, b, a, t, s, u) \
- C4B(p, n, ZERO, ZERO, ZERO, a, t, s, u)
-
#if NOUVEAU_DRIVER == 0xc0
-const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+const struct nvc0_vertex_format nvc0_vertex_format[PIPE_FORMAT_COUNT] =
#else
-const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
+const struct nv50_vertex_format nv50_vertex_format[PIPE_FORMAT_COUNT] =
#endif
{
- C4A(B8G8R8A8_UNORM, BGRA8_UNORM, C2, C1, C0, C3, UNORM, 8_8_8_8, TDV, 1),
- F3A(B8G8R8X8_UNORM, BGRX8_UNORM, C2, C1, C0, xx, UNORM, 8_8_8_8, TD),
- C4A(B8G8R8A8_SRGB, BGRA8_SRGB, C2, C1, C0, C3, UNORM, 8_8_8_8, TD, 1),
- F3A(B8G8R8X8_SRGB, BGRX8_SRGB, C2, C1, C0, xx, UNORM, 8_8_8_8, TD),
- C4A(R8G8B8A8_UNORM, RGBA8_UNORM, C0, C1, C2, C3, UNORM, 8_8_8_8, IBV, 0),
- F3A(R8G8B8X8_UNORM, RGBX8_UNORM, C0, C1, C2, xx, UNORM, 8_8_8_8, TB),
- C4A(R8G8B8A8_SRGB, RGBA8_SRGB, C0, C1, C2, C3, UNORM, 8_8_8_8, TB, 0),
- F3B(R8G8B8X8_SRGB, RGBX8_SRGB, C0, C1, C2, xx, UNORM, 8_8_8_8, TB),
-
- ZXB(Z16_UNORM, Z16_UNORM, C0, C0, C0, xx, UNORM, Z16, TZ),
- ZXB(Z32_FLOAT, Z32_FLOAT, C0, C0, C0, xx, FLOAT, Z32, TZ),
- ZXB(Z24X8_UNORM, Z24_X8_UNORM, C0, C0, C0, xx, UNORM, Z24_X8, TZ),
- SZB(X8Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ),
- ZSB(Z24_UNORM_S8_UINT, Z24_S8_UNORM, C0, C0, C0, xx, UNORM, Z24_S8, TZ),
- SZB(S8_UINT_Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ),
- ZSB(Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, C0, C0, C0, xx, FLOAT,
- Z32_S8_X24, TZ),
-
- SXB(S8_UINT, C0, 8, T),
- SXB(X24S8_UINT, C1, Z24_S8, T),
- SXB(S8X24_UINT, C0, S8_Z24, T),
- SXB(X32_S8X24_UINT, C1, Z32_S8_X24, T),
-
- F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
- C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
- F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
- C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
- F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
- F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
-
- C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
- IBV, 0),
- C4A(B10G10R10A2_UNORM, BGR10_A2_UNORM, C2, C1, C0, C3, UNORM, 10_10_10_2,
- TDV, 1),
- C4A(R10G10B10A2_SNORM, NONE, C0, C1, C2, C3, SNORM, 10_10_10_2, TV, 0),
- C4A(B10G10R10A2_SNORM, NONE, C2, C1, C0, C3, SNORM, 10_10_10_2, TV, 1),
- C4A(R10G10B10A2_UINT, RGB10_A2_UINT, C0, C1, C2, C3, UINT, 10_10_10_2, TRV, 0),
- C4A(B10G10R10A2_UINT, RGB10_A2_UINT, C2, C1, C0, C3, UINT, 10_10_10_2, TV, 0),
-
- F3A(R11G11B10_FLOAT, R11G11B10_FLOAT, C0, C1, C2, xx, FLOAT, 11_11_10, IBV),
-
- F3B(L8_UNORM, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB),
- F3B(L8_SRGB, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB),
- F3B(L8_SNORM, R8_SNORM, C0, C0, C0, xx, SNORM, 8, TC),
- I3B(L8_SINT, R8_SINT, C0, C0, C0, xx, SINT, 8, TR),
- I3B(L8_UINT, R8_UINT, C0, C0, C0, xx, UINT, 8, TR),
- F3B(L16_UNORM, R16_UNORM, C0, C0, C0, xx, UNORM, 16, TC),
- F3B(L16_SNORM, R16_SNORM, C0, C0, C0, xx, SNORM, 16, TC),
- F3B(L16_FLOAT, R16_FLOAT, C0, C0, C0, xx, FLOAT, 16, TB),
- I3B(L16_SINT, R16_SINT, C0, C0, C0, xx, SINT, 16, TR),
- I3B(L16_UINT, R16_UINT, C0, C0, C0, xx, UINT, 16, TR),
- F3B(L32_FLOAT, R32_FLOAT, C0, C0, C0, xx, FLOAT, 32, TB),
- I3B(L32_SINT, R32_SINT, C0, C0, C0, xx, SINT, 32, TR),
- I3B(L32_UINT, R32_UINT, C0, C0, C0, xx, UINT, 32, TR),
-
- C4B(I8_UNORM, R8_UNORM, C0, C0, C0, C0, UNORM, 8, TR),
- C4B(I8_SNORM, R8_SNORM, C0, C0, C0, C0, SNORM, 8, TR),
- C4B(I8_SINT, R8_SINT, C0, C0, C0, C0, SINT, 8, TR),
- C4B(I8_UINT, R8_UINT, C0, C0, C0, C0, UINT, 8, TR),
- C4B(I16_UNORM, R16_UNORM, C0, C0, C0, C0, UNORM, 16, TR),
- C4B(I16_SNORM, R16_SNORM, C0, C0, C0, C0, SNORM, 16, TR),
- C4B(I16_FLOAT, R16_FLOAT, C0, C0, C0, C0, FLOAT, 16, TR),
- C4B(I16_SINT, R16_SINT, C0, C0, C0, C0, SINT, 16, TR),
- C4B(I16_UINT, R16_UINT, C0, C0, C0, C0, UINT, 16, TR),
- C4B(I32_FLOAT, R32_FLOAT, C0, C0, C0, C0, FLOAT, 32, TR),
- C4B(I32_SINT, R32_SINT, C0, C0, C0, C0, SINT, 32, TR),
- C4B(I32_UINT, R32_UINT, C0, C0, C0, C0, UINT, 32, TR),
-
- A1B(A8_UNORM, A8_UNORM, xx, xx, xx, C0, UNORM, 8, TB),
- A1B(A8_SNORM, R8_SNORM, xx, xx, xx, C0, SNORM, 8, T),
- A1B(A8_SINT, R8_SINT, xx, xx, xx, C0, SINT, 8, T),
- A1B(A8_UINT, R8_UINT, xx, xx, xx, C0, UINT, 8, T),
- A1B(A16_UNORM, R16_UNORM, xx, xx, xx, C0, UNORM, 16, T),
- A1B(A16_SNORM, R16_SNORM, xx, xx, xx, C0, SNORM, 16, T),
- A1B(A16_FLOAT, R16_FLOAT, xx, xx, xx, C0, FLOAT, 16, T),
- A1B(A16_SINT, R16_SINT, xx, xx, xx, C0, SINT, 16, T),
- A1B(A16_UINT, R16_UINT, xx, xx, xx, C0, UINT, 16, T),
- A1B(A32_FLOAT, R32_FLOAT, xx, xx, xx, C0, FLOAT, 32, T),
- A1B(A32_SINT, R32_SINT, xx, xx, xx, C0, SINT, 32, T),
- A1B(A32_UINT, R32_UINT, xx, xx, xx, C0, UINT, 32, T),
-
- C4B(L4A4_UNORM, NONE, C0, C0, C0, C1, UNORM, 4_4, T),
- C4B(L8A8_UNORM, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T),
- C4B(L8A8_SNORM, RG8_SNORM, C0, C0, C0, C1, SNORM, 8_8, T),
- C4B(L8A8_SRGB, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T),
- C4B(L8A8_SINT, RG8_SINT, C0, C0, C0, C1, SINT, 8_8, T),
- C4B(L8A8_UINT, RG8_UINT, C0, C0, C0, C1, UINT, 8_8, T),
- C4B(L16A16_UNORM, RG16_UNORM, C0, C0, C0, C1, UNORM, 16_16, T),
- C4B(L16A16_SNORM, RG16_SNORM, C0, C0, C0, C1, SNORM, 16_16, T),
- C4B(L16A16_FLOAT, RG16_FLOAT, C0, C0, C0, C1, FLOAT, 16_16, T),
- C4B(L16A16_SINT, RG16_SINT, C0, C0, C0, C1, SINT, 16_16, T),
- C4B(L16A16_UINT, RG16_UINT, C0, C0, C0, C1, UINT, 16_16, T),
- C4B(L32A32_FLOAT, RG32_FLOAT, C0, C0, C0, C1, FLOAT, 32_32, T),
- C4B(L32A32_SINT, RG32_SINT, C0, C0, C0, C1, SINT, 32_32, T),
- C4B(L32A32_UINT, RG32_UINT, C0, C0, C0, C1, UINT, 32_32, T),
-
- F3B(DXT1_RGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T),
- F3B(DXT1_SRGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T),
- C4B(DXT1_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T),
- C4B(DXT1_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T),
- C4B(DXT3_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T),
- C4B(DXT3_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T),
- C4B(DXT5_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T),
- C4B(DXT5_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T),
-
- F1B(RGTC1_UNORM, NONE, C0, xx, xx, xx, UNORM, RGTC1, T),
- F1B(RGTC1_SNORM, NONE, C0, xx, xx, xx, SNORM, RGTC1, T),
- F2B(RGTC2_UNORM, NONE, C0, C1, xx, xx, UNORM, RGTC2, T),
- F2B(RGTC2_SNORM, NONE, C0, C1, xx, xx, SNORM, RGTC2, T),
- F3B(LATC1_UNORM, NONE, C0, C0, C0, xx, UNORM, RGTC1, T),
- F3B(LATC1_SNORM, NONE, C0, C0, C0, xx, SNORM, RGTC1, T),
- C4B(LATC2_UNORM, NONE, C0, C0, C0, C1, UNORM, RGTC2, T),
- C4B(LATC2_SNORM, NONE, C0, C0, C0, C1, SNORM, RGTC2, T),
-
- C4B(BPTC_RGBA_UNORM, NONE, C0, C1, C2, C3, UNORM, BPTC, t),
- C4B(BPTC_SRGBA, NONE, C0, C1, C2, C3, UNORM, BPTC, t),
- F3B(BPTC_RGB_FLOAT, NONE, C0, C1, C2, xx, FLOAT, BPTC_FLOAT, t),
- F3B(BPTC_RGB_UFLOAT, NONE, C0, C1, C2, xx, FLOAT, BPTC_UFLOAT, t),
-
- C4A(R32G32B32A32_FLOAT, RGBA32_FLOAT, C0, C1, C2, C3, FLOAT, 32_32_32_32,
- IBV, 0),
- C4A(R32G32B32A32_UNORM, NONE, C0, C1, C2, C3, UNORM, 32_32_32_32, TV, 0),
- C4A(R32G32B32A32_SNORM, NONE, C0, C1, C2, C3, SNORM, 32_32_32_32, TV, 0),
- C4A(R32G32B32A32_SINT, RGBA32_SINT, C0, C1, C2, C3, SINT, 32_32_32_32,
- IRV, 0),
- C4A(R32G32B32A32_UINT, RGBA32_UINT, C0, C1, C2, C3, UINT, 32_32_32_32,
- IRV, 0),
- F3B(R32G32B32X32_FLOAT, RGBX32_FLOAT, C0, C1, C2, xx, FLOAT, 32_32_32_32, TB),
- I3B(R32G32B32X32_SINT, RGBX32_SINT, C0, C1, C2, xx, SINT, 32_32_32_32, TR),
- I3B(R32G32B32X32_UINT, RGBX32_UINT, C0, C1, C2, xx, UINT, 32_32_32_32, TR),
-
- F2A(R32G32_FLOAT, RG32_FLOAT, C0, C1, xx, xx, FLOAT, 32_32, IBV),
- F2A(R32G32_UNORM, NONE, C0, C1, xx, xx, UNORM, 32_32, TV),
- F2A(R32G32_SNORM, NONE, C0, C1, xx, xx, SNORM, 32_32, TV),
- I2A(R32G32_SINT, RG32_SINT, C0, C1, xx, xx, SINT, 32_32, IRV),
- I2A(R32G32_UINT, RG32_UINT, C0, C1, xx, xx, UINT, 32_32, IRV),
-
- F1A(R32_FLOAT, R32_FLOAT, C0, xx, xx, xx, FLOAT, 32, IBV),
- F1A(R32_UNORM, NONE, C0, xx, xx, xx, UNORM, 32, TV),
- F1A(R32_SNORM, NONE, C0, xx, xx, xx, SNORM, 32, TV),
- I1A(R32_SINT, R32_SINT, C0, xx, xx, xx, SINT, 32, IRV),
- I1A(R32_UINT, R32_UINT, C0, xx, xx, xx, UINT, 32, IRV),
-
- C4A(R16G16B16A16_FLOAT, RGBA16_FLOAT, C0, C1, C2, C3, FLOAT, 16_16_16_16,
- IBV, 0),
- C4A(R16G16B16A16_UNORM, RGBA16_UNORM, C0, C1, C2, C3, UNORM, 16_16_16_16,
- ICV, 0),
- C4A(R16G16B16A16_SNORM, RGBA16_SNORM, C0, C1, C2, C3, SNORM, 16_16_16_16,
- ICV, 0),
- C4A(R16G16B16A16_SINT, RGBA16_SINT, C0, C1, C2, C3, SINT, 16_16_16_16,
- IRV, 0),
- C4A(R16G16B16A16_UINT, RGBA16_UINT, C0, C1, C2, C3, UINT, 16_16_16_16,
- IRV, 0),
- F3B(R16G16B16X16_FLOAT, RGBX16_FLOAT, C0, C1, C2, xx, FLOAT, 16_16_16_16, TB),
- F3B(R16G16B16X16_UNORM, RGBA16_UNORM, C0, C1, C2, xx, UNORM, 16_16_16_16, T),
- F3B(R16G16B16X16_SNORM, RGBA16_SNORM, C0, C1, C2, xx, SNORM, 16_16_16_16, T),
- I3B(R16G16B16X16_SINT, RGBA16_SINT, C0, C1, C2, xx, SINT, 16_16_16_16, T),
- I3B(R16G16B16X16_UINT, RGBA16_UINT, C0, C1, C2, xx, UINT, 16_16_16_16, T),
-
- F2A(R16G16_FLOAT, RG16_FLOAT, C0, C1, xx, xx, FLOAT, 16_16, IBV),
- F2A(R16G16_UNORM, RG16_UNORM, C0, C1, xx, xx, UNORM, 16_16, ICV),
- F2A(R16G16_SNORM, RG16_SNORM, C0, C1, xx, xx, SNORM, 16_16, ICV),
- I2A(R16G16_SINT, RG16_SINT, C0, C1, xx, xx, SINT, 16_16, IRV),
- I2A(R16G16_UINT, RG16_UINT, C0, C1, xx, xx, UINT, 16_16, IRV),
-
- F1A(R16_FLOAT, R16_FLOAT, C0, xx, xx, xx, FLOAT, 16, IBV),
- F1A(R16_UNORM, R16_UNORM, C0, xx, xx, xx, UNORM, 16, ICV),
- F1A(R16_SNORM, R16_SNORM, C0, xx, xx, xx, SNORM, 16, ICV),
- I1A(R16_SINT, R16_SINT, C0, xx, xx, xx, SINT, 16, IRV),
- I1A(R16_UINT, R16_UINT, C0, xx, xx, xx, UINT, 16, IRV),
-
- C4A(R8G8B8A8_SNORM, RGBA8_SNORM, C0, C1, C2, C3, SNORM, 8_8_8_8, ICV, 0),
- C4A(R8G8B8A8_SINT, RGBA8_SINT, C0, C1, C2, C3, SINT, 8_8_8_8, IRV, 0),
- C4A(R8G8B8A8_UINT, RGBA8_UINT, C0, C1, C2, C3, UINT, 8_8_8_8, IRV, 0),
- F3B(R8G8B8X8_SNORM, RGBA8_SNORM, C0, C1, C2, xx, SNORM, 8_8_8_8, T),
- I3B(R8G8B8X8_SINT, RGBA8_SINT, C0, C1, C2, xx, SINT, 8_8_8_8, T),
- I3B(R8G8B8X8_UINT, RGBA8_UINT, C0, C1, C2, xx, UINT, 8_8_8_8, T),
-
- F2A(R8G8_UNORM, RG8_UNORM, C0, C1, xx, xx, UNORM, 8_8, IBV),
- F2A(R8G8_SNORM, RG8_SNORM, C0, C1, xx, xx, SNORM, 8_8, ICV),
- I2A(R8G8_SINT, RG8_SINT, C0, C1, xx, xx, SINT, 8_8, IRV),
- I2A(R8G8_UINT, RG8_UINT, C0, C1, xx, xx, UINT, 8_8, IRV),
-
- F1A(R8_UNORM, R8_UNORM, C0, xx, xx, xx, UNORM, 8, IBV),
- F1A(R8_SNORM, R8_SNORM, C0, xx, xx, xx, SNORM, 8, ICV),
- I1A(R8_SINT, R8_SINT, C0, xx, xx, xx, SINT, 8, IRV),
- I1A(R8_UINT, R8_UINT, C0, xx, xx, xx, UINT, 8, IRV),
-
- F3B(R8G8_B8G8_UNORM, NONE, C0, C1, C2, xx, UNORM, U8_YA8_V8_YB8, T),
- F3B(G8R8_B8R8_UNORM, NONE, C1, C0, C2, xx, UNORM, U8_YA8_V8_YB8, T),
- F3B(G8R8_G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, YA8_U8_YB8_V8, T),
- F3B(R8G8_R8B8_UNORM, NONE, C1, C0, C2, xx, UNORM, YA8_U8_YB8_V8, T),
-
- F1B(R1_UNORM, BITMAP, C0, xx, xx, xx, UNORM, BITMAP, T),
-
- C4B(R4A4_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 4_4, T),
- C4B(R8A8_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 8_8, T),
- C4B(A4R4_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 4_4, T),
- C4B(A8R8_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 8_8, T),
-
- TBLENT_B_(R8SG8SB8UX8U_NORM, 0,
- C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, T),
- TBLENT_B_(R5SG5SB6U_NORM, 0,
- C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, T),
-
- /* vertex-only formats: */
-
- C4A(R32G32B32A32_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 32_32_32_32, V, 0),
- C4A(R32G32B32A32_USCALED, NONE, C0, C1, C2, C3, USCALED, 32_32_32_32, V, 0),
- F3A(R32G32B32_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, tV),
- F3A(R32G32B32_UNORM, NONE, C0, C1, C2, xx, UNORM, 32_32_32, V),
- F3A(R32G32B32_SNORM, NONE, C0, C1, C2, xx, SNORM, 32_32_32, V),
- I3A(R32G32B32_SINT, NONE, C0, C1, C2, xx, SINT, 32_32_32, tV),
- I3A(R32G32B32_UINT, NONE, C0, C1, C2, xx, UINT, 32_32_32, tV),
- F3A(R32G32B32_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 32_32_32, V),
- F3A(R32G32B32_USCALED, NONE, C0, C1, C2, xx, USCALED, 32_32_32, V),
- F2A(R32G32_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 32_32, V),
- F2A(R32G32_USCALED, NONE, C0, C1, xx, xx, USCALED, 32_32, V),
- F1A(R32_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 32, V),
- F1A(R32_USCALED, NONE, C0, xx, xx, xx, USCALED, 32, V),
-
- C4A(R16G16B16A16_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 16_16_16_16, V, 0),
- C4A(R16G16B16A16_USCALED, NONE, C0, C1, C2, C3, USCALED, 16_16_16_16, V, 0),
- F3A(R16G16B16_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 16_16_16, V),
- F3A(R16G16B16_UNORM, NONE, C0, C1, C2, xx, UNORM, 16_16_16, V),
- F3A(R16G16B16_SNORM, NONE, C0, C1, C2, xx, SNORM, 16_16_16, V),
- I3A(R16G16B16_SINT, NONE, C0, C1, C2, xx, SINT, 16_16_16, V),
- I3A(R16G16B16_UINT, NONE, C0, C1, C2, xx, UINT, 16_16_16, V),
- F3A(R16G16B16_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 16_16_16, V),
- F3A(R16G16B16_USCALED, NONE, C0, C1, C2, xx, USCALED, 16_16_16, V),
- F2A(R16G16_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 16_16, V),
- F2A(R16G16_USCALED, NONE, C0, C1, xx, xx, USCALED, 16_16, V),
- F1A(R16_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 16, V),
- F1A(R16_USCALED, NONE, C0, xx, xx, xx, USCALED, 16, V),
-
- C4A(R10G10B10A2_USCALED, NONE, C0, C1, C2, C3, USCALED, 10_10_10_2, V, 0),
- C4A(R10G10B10A2_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 10_10_10_2, V, 0),
- C4A(B10G10R10A2_USCALED, NONE, C0, C1, C2, C3, USCALED, 10_10_10_2, V, 1),
- C4A(B10G10R10A2_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 10_10_10_2, V, 1),
-
- C4A(R8G8B8A8_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 8_8_8_8, V, 0),
- C4A(R8G8B8A8_USCALED, NONE, C0, C1, C2, C3, USCALED, 8_8_8_8, V, 0),
- F3A(R8G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, 8_8_8, V),
- F3A(R8G8B8_SNORM, NONE, C0, C1, C2, xx, SNORM, 8_8_8, V),
- I2A(R8G8B8_SINT, NONE, C0, C1, C2, xx, SINT, 8_8_8, V),
- I2A(R8G8B8_UINT, NONE, C0, C1, C2, xx, UINT, 8_8_8, V),
- F3A(R8G8B8_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 8_8_8, V),
- F3A(R8G8B8_USCALED, NONE, C0, C1, C2, xx, USCALED, 8_8_8, V),
- F2A(R8G8_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 8_8, V),
- F2A(R8G8_USCALED, NONE, C0, C1, xx, xx, USCALED, 8_8, V),
- F1A(R8_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 8, V),
- F1A(R8_USCALED, NONE, C0, xx, xx, xx, USCALED, 8, V),
+ VF(A, B8G8R8A8_UNORM, UNORM, 8_8_8_8, 1),
+ VF(A, R8G8B8A8_UNORM, UNORM, 8_8_8_8, 0),
+
+ VF(A, R10G10B10A2_UNORM, UNORM, 10_10_10_2, 0),
+ VF(A, B10G10R10A2_UNORM, UNORM, 10_10_10_2, 1),
+ VF(A, R10G10B10A2_SNORM, SNORM, 10_10_10_2, 0),
+ VF(A, B10G10R10A2_SNORM, SNORM, 10_10_10_2, 1),
+ VF(A, R10G10B10A2_UINT, UINT, 10_10_10_2, 0),
+ VF(A, B10G10R10A2_UINT, UINT, 10_10_10_2, 0),
+
+ VF(A, R11G11B10_FLOAT, FLOAT, 11_11_10, 0),
+
+ VF(A, R32G32B32A32_FLOAT, FLOAT, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_UNORM, UNORM, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_SNORM, SNORM, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_SINT, SINT, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_UINT, UINT, 32_32_32_32, 0),
+
+ VF(A, R32G32_FLOAT, FLOAT, 32_32, 0),
+ VF(A, R32G32_UNORM, UNORM, 32_32, 0),
+ VF(A, R32G32_SNORM, SNORM, 32_32, 0),
+ VF(A, R32G32_SINT, SINT, 32_32, 0),
+ VF(A, R32G32_UINT, UINT, 32_32, 0),
+
+ VF(A, R32_FLOAT, FLOAT, 32, 0),
+ VF(A, R32_UNORM, UNORM, 32, 0),
+ VF(A, R32_SNORM, SNORM, 32, 0),
+ VF(A, R32_SINT, SINT, 32, 0),
+ VF(A, R32_UINT, UINT, 32, 0),
+
+ VF(A, R16G16B16A16_FLOAT, FLOAT, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_UNORM, UNORM, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_SNORM, SNORM, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_SINT, SINT, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_UINT, UINT, 16_16_16_16, 0),
+
+ VF(A, R16G16_FLOAT, FLOAT, 16_16, 0),
+ VF(A, R16G16_UNORM, UNORM, 16_16, 0),
+ VF(A, R16G16_SNORM, SNORM, 16_16, 0),
+ VF(A, R16G16_SINT, SINT, 16_16, 0),
+ VF(A, R16G16_UINT, UINT, 16_16, 0),
+
+ VF(A, R16_FLOAT, FLOAT, 16, 0),
+ VF(A, R16_UNORM, UNORM, 16, 0),
+ VF(A, R16_SNORM, SNORM, 16, 0),
+ VF(A, R16_SINT, SINT, 16, 0),
+ VF(A, R16_UINT, UINT, 16, 0),
+
+ VF(A, R8G8B8A8_SNORM, SNORM, 8_8_8_8, 0),
+ VF(A, R8G8B8A8_SINT, SINT, 8_8_8_8, 0),
+ VF(A, R8G8B8A8_UINT, UINT, 8_8_8_8, 0),
+
+ VF(A, R8G8_UNORM, UNORM, 8_8, 0),
+ VF(A, R8G8_SNORM, SNORM, 8_8, 0),
+ VF(A, R8G8_SINT, SINT, 8_8, 0),
+ VF(A, R8G8_UINT, UINT, 8_8, 0),
+
+ VF(A, R8_UNORM, UNORM, 8, 0),
+ VF(A, R8_SNORM, SNORM, 8, 0),
+ VF(A, R8_SINT, SINT, 8, 0),
+ VF(A, R8_UINT, UINT, 8, 0),
+
+ VF(A, R32G32B32A32_SSCALED, SSCALED, 32_32_32_32, 0),
+ VF(A, R32G32B32A32_USCALED, USCALED, 32_32_32_32, 0),
+ VF(A, R32G32B32_FLOAT, FLOAT, 32_32_32, 0),
+ VF(A, R32G32B32_UNORM, UNORM, 32_32_32, 0),
+ VF(A, R32G32B32_SNORM, SNORM, 32_32_32, 0),
+ VF(A, R32G32B32_SINT, SINT, 32_32_32, 0),
+ VF(A, R32G32B32_UINT, UINT, 32_32_32, 0),
+ VF(A, R32G32B32_SSCALED, SSCALED, 32_32_32, 0),
+ VF(A, R32G32B32_USCALED, USCALED, 32_32_32, 0),
+ VF(A, R32G32_SSCALED, SSCALED, 32_32, 0),
+ VF(A, R32G32_USCALED, USCALED, 32_32, 0),
+ VF(A, R32_SSCALED, SSCALED, 32, 0),
+ VF(A, R32_USCALED, USCALED, 32, 0),
+
+ VF(A, R16G16B16A16_SSCALED, SSCALED, 16_16_16_16, 0),
+ VF(A, R16G16B16A16_USCALED, USCALED, 16_16_16_16, 0),
+ VF(A, R16G16B16_FLOAT, FLOAT, 16_16_16, 0),
+ VF(A, R16G16B16_UNORM, UNORM, 16_16_16, 0),
+ VF(A, R16G16B16_SNORM, SNORM, 16_16_16, 0),
+ VF(A, R16G16B16_SINT, SINT, 16_16_16, 0),
+ VF(A, R16G16B16_UINT, UINT, 16_16_16, 0),
+ VF(A, R16G16B16_SSCALED, SSCALED, 16_16_16, 0),
+ VF(A, R16G16B16_USCALED, USCALED, 16_16_16, 0),
+ VF(A, R16G16_SSCALED, SSCALED, 16_16, 0),
+ VF(A, R16G16_USCALED, USCALED, 16_16, 0),
+ VF(A, R16_SSCALED, SSCALED, 16, 0),
+ VF(A, R16_USCALED, USCALED, 16, 0),
+
+ VF(A, R10G10B10A2_USCALED, USCALED, 10_10_10_2, 0),
+ VF(A, R10G10B10A2_SSCALED, SSCALED, 10_10_10_2, 0),
+ VF(A, B10G10R10A2_USCALED, USCALED, 10_10_10_2, 1),
+ VF(A, B10G10R10A2_SSCALED, SSCALED, 10_10_10_2, 1),
+
+ VF(A, R8G8B8A8_SSCALED, SSCALED, 8_8_8_8, 0),
+ VF(A, R8G8B8A8_USCALED, USCALED, 8_8_8_8, 0),
+ VF(A, R8G8B8_UNORM, UNORM, 8_8_8, 0),
+ VF(A, R8G8B8_SNORM, SNORM, 8_8_8, 0),
+ VF(A, R8G8B8_SINT, SINT, 8_8_8, 0),
+ VF(A, R8G8B8_UINT, UINT, 8_8_8, 0),
+ VF(A, R8G8B8_SSCALED, SSCALED, 8_8_8, 0),
+ VF(A, R8G8B8_USCALED, USCALED, 8_8_8, 0),
+ VF(A, R8G8_SSCALED, SSCALED, 8_8, 0),
+ VF(A, R8G8_USCALED, USCALED, 8_8, 0),
+ VF(A, R8_SSCALED, SSCALED, 8, 0),
+ VF(A, R8_USCALED, USCALED, 8, 0),
/* FIXED types: not supported natively, converted on VBO push */
- C4B(R32G32B32A32_FIXED, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V),
- F3B(R32G32B32_FIXED, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V),
- F2B(R32G32_FIXED, NONE, C0, C1, xx, xx, FLOAT, 32_32, V),
- F1B(R32_FIXED, NONE, C0, xx, xx, xx, FLOAT, 32, V),
+ VF(x, R32G32B32A32_FIXED, xx, xx, xx),
+ VF(x, R32G32B32_FIXED, xx, xx, xx),
+ VF(x, R32G32_FIXED, xx, xx, xx),
+ VF(x, R32_FIXED, xx, xx, xx),
- C4B(R64G64B64A64_FLOAT, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V),
- F3B(R64G64B64_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V),
- F2B(R64G64_FLOAT, NONE, C0, C1, xx, xx, FLOAT, 32_32, V),
- F1B(R64_FLOAT, NONE, C0, xx, xx, xx, FLOAT, 32, V),
+ VF(x, R64G64B64A64_FLOAT, xx, xx, xx),
+ VF(x, R64G64B64_FLOAT, xx, xx, xx),
+ VF(x, R64G64_FLOAT, xx, xx, xx),
+ VF(x, R64_FLOAT, xx, xx, xx),
};
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index 79c7023b2d4..be19c0fdc85 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -218,11 +218,12 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
struct pipe_context *pipe = &nv50->base.pipe;
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ struct pipe_grid_info info = {};
uint32_t mask;
uint32_t input[3];
const uint block[3] = { 32, 1, 1 };
const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
- int c;
+ int c, i;
if (unlikely(!screen->pm.prog)) {
struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
@@ -262,7 +263,14 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
pipe->bind_compute_state(pipe, screen->pm.prog);
input[0] = hq->bo->offset + hq->base_offset;
input[1] = hq->sequence;
- pipe->launch_grid(pipe, block, grid, 0, input);
+
+ for (i = 0; i < 3; i++) {
+ info.block[i] = block[i];
+ info.grid[i] = grid[i];
+ }
+ info.pc = 0;
+ info.input = input;
+ pipe->launch_grid(pipe, &info);
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 14d0085975b..8d11dd7bf21 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -72,7 +72,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_SHARED);
- return (nv50_format_table[format].usage & bindings) == bindings;
+ return (( nv50_format_table[format].usage |
+ nv50_vertex_format[format].usage) & bindings) == bindings;
}
static int
@@ -263,8 +264,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
- case PIPE_SHADER_COMPUTE:
break;
+ case PIPE_SHADER_COMPUTE:
default:
return 0;
}
@@ -315,6 +316,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -562,7 +565,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
if (screen->tesla->oclass >= NVA0_3D_CLASS) {
BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
- PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ PUSH_DATA (push, 0);
}
BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 2a4983d1020..cce92f0dd5e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -50,6 +50,7 @@ struct nv50_graph_state {
uint8_t num_samplers[3];
uint8_t prim_size;
uint16_t scissor;
+ bool seamless_cube_map;
};
struct nv50_screen {
@@ -156,12 +157,27 @@ nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
struct nv50_format {
uint32_t rt;
- uint32_t tic;
+ struct {
+ unsigned format:6;
+ unsigned type_r:3;
+ unsigned type_g:3;
+ unsigned type_b:3;
+ unsigned type_a:3;
+ unsigned src_x:3;
+ unsigned src_y:3;
+ unsigned src_z:3;
+ unsigned src_w:3;
+ } tic;
+ uint32_t usage;
+};
+
+struct nv50_vertex_format {
uint32_t vtx;
uint32_t usage;
};
extern const struct nv50_format nv50_format_table[];
+extern const struct nv50_vertex_format nv50_vertex_format[];
static inline void
nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index cb040439139..6a09808807a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -21,6 +21,7 @@
*/
#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
@@ -33,7 +34,7 @@
#include "nv50/nv50_query_hw.h"
#include "nv50/nv50_3d.xml.h"
-#include "nv50/nv50_texture.xml.h"
+#include "nv50/g80_texture.xml.h"
#include "nouveau_gldefs.h"
@@ -437,24 +438,29 @@ nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
/* ====================== SAMPLERS AND TEXTURES ================================
*/
-#define NV50_TSC_WRAP_CASE(n) \
- case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
-
static inline unsigned
nv50_tsc_wrap_mode(unsigned wrap)
{
switch (wrap) {
- NV50_TSC_WRAP_CASE(REPEAT);
- NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
- NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
- NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
- NV50_TSC_WRAP_CASE(CLAMP);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
- NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ case PIPE_TEX_WRAP_REPEAT:
+ return G80_TSC_WRAP_WRAP;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return G80_TSC_WRAP_MIRROR;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return G80_TSC_WRAP_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return G80_TSC_WRAP_BORDER;
+ case PIPE_TEX_WRAP_CLAMP:
+ return G80_TSC_WRAP_CLAMP_OGL;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return G80_TSC_WRAP_MIRROR_ONCE_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_OGL;
default:
NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
- return NV50_TSC_WRAP_REPEAT;
+ return G80_TSC_WRAP_WRAP;
}
}
@@ -474,42 +480,44 @@ nv50_sampler_state_create(struct pipe_context *pipe,
switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- so->tsc[1] = NV50_TSC_1_MAGF_LINEAR;
+ so->tsc[1] = G80_TSC_1_MAG_FILTER_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
- so->tsc[1] = NV50_TSC_1_MAGF_NEAREST;
+ so->tsc[1] = G80_TSC_1_MAG_FILTER_NEAREST;
break;
}
switch (cso->min_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
+ so->tsc[1] |= G80_TSC_1_MIN_FILTER_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
- so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
+ so->tsc[1] |= G80_TSC_1_MIN_FILTER_NEAREST;
break;
}
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_LINEAR:
- so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
+ so->tsc[1] |= G80_TSC_1_MIP_FILTER_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NEAREST:
- so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
+ so->tsc[1] |= G80_TSC_1_MIP_FILTER_NEAREST;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
- so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
+ so->tsc[1] |= G80_TSC_1_MIP_FILTER_NONE;
break;
}
if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) {
if (cso->seamless_cube_map)
- so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS;
+ so->tsc[1] |= GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING;
if (!cso->normalized_coords)
- so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS;
+ so->tsc[1] |= GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS;
+ } else {
+ so->seamless_cube_map = cso->seamless_cube_map;
}
if (cso->max_anisotropy >= 16)
@@ -521,10 +529,10 @@ nv50_sampler_state_create(struct pipe_context *pipe,
so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
if (cso->max_anisotropy >= 4)
- so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
+ so->tsc[1] |= 6 << G80_TSC_1_TRILIN_OPT__SHIFT;
else
if (cso->max_anisotropy >= 2)
- so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
+ so->tsc[1] |= 4 << G80_TSC_1_TRILIN_OPT__SHIFT;
}
if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
@@ -936,21 +944,10 @@ nv50_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *fb)
{
struct nv50_context *nv50 = nv50_context(pipe);
- unsigned i;
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
- for (i = 0; i < fb->nr_cbufs; ++i)
- pipe_surface_reference(&nv50->framebuffer.cbufs[i], fb->cbufs[i]);
- for (; i < nv50->framebuffer.nr_cbufs; ++i)
- pipe_surface_reference(&nv50->framebuffer.cbufs[i], NULL);
-
- nv50->framebuffer.nr_cbufs = fb->nr_cbufs;
-
- nv50->framebuffer.width = fb->width;
- nv50->framebuffer.height = fb->height;
-
- pipe_surface_reference(&nv50->framebuffer.zsbuf, fb->zsbuf);
+ util_copy_framebuffer_state(&nv50->framebuffer, fb);
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 4af969997f2..55369781606 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -2,7 +2,6 @@
#include "util/u_format.h"
#include "nv50/nv50_context.h"
-#include "nv50/nv50_defs.xml.h"
static inline void
nv50_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
index e0793bb6ec4..6bc451450b1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
@@ -7,6 +7,7 @@
struct nv50_tsc_entry {
int id;
uint32_t tsc[8];
+ bool seamless_cube_map;
};
static inline struct nv50_tsc_entry *
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index c69fa5abb98..4db73cb7fef 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -37,8 +37,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_resource.h"
-#include "nv50/nv50_defs.xml.h"
-#include "nv50/nv50_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
+#include "nv50/g80_texture.xml.h"
/* these are used in nv50_blit.h */
#define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL
@@ -64,15 +64,15 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
switch (util_format_get_blocksize(format)) {
case 1:
- return NV50_SURFACE_FORMAT_R8_UNORM;
+ return G80_SURFACE_FORMAT_R8_UNORM;
case 2:
- return NV50_SURFACE_FORMAT_R16_UNORM;
+ return G80_SURFACE_FORMAT_R16_UNORM;
case 4:
- return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ return G80_SURFACE_FORMAT_BGRA8_UNORM;
case 8:
- return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
+ return G80_SURFACE_FORMAT_RGBA16_FLOAT;
case 16:
- return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
+ return G80_SURFACE_FORMAT_RGBA32_FLOAT;
default:
return 0;
}
@@ -628,7 +628,7 @@ nv50_clear_buffer_push(struct pipe_context *pipe,
offset &= ~0xff;
BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
PUSH_DATA (push, 262144);
@@ -638,7 +638,7 @@ nv50_clear_buffer_push(struct pipe_context *pipe,
PUSH_DATA (push, buf->address + offset);
BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
PUSH_DATA (push, 0);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
PUSH_DATA (push, size);
PUSH_DATA (push, 1);
@@ -997,12 +997,14 @@ nv50_blitter_make_sampler(struct nv50_blitter *blit)
blit->sampler[0].id = -1;
- blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT);
+ blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT);
blit->sampler[0].tsc[1] =
- NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE;
+ G80_TSC_1_MAG_FILTER_NEAREST |
+ G80_TSC_1_MIN_FILTER_NEAREST |
+ G80_TSC_1_MIP_FILTER_NONE;
/* clamp to edge, min/max lod = 0, bilinear filtering */
@@ -1010,7 +1012,9 @@ nv50_blitter_make_sampler(struct nv50_blitter *blit)
blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
blit->sampler[1].tsc[1] =
- NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE;
+ G80_TSC_1_MAG_FILTER_LINEAR |
+ G80_TSC_1_MIN_FILTER_LINEAR |
+ G80_TSC_1_MIP_FILTER_NONE;
}
unsigned
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index c3f433608df..4b69c3bd504 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -22,32 +22,24 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_resource.h"
-#include "nv50/nv50_texture.xml.h"
-#include "nv50/nv50_defs.xml.h"
+#include "nv50/g80_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
#include "util/u_format.h"
-#define NV50_TIC_0_SWIZZLE__MASK \
- (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
- NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
-
static inline uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
+nv50_tic_swizzle(const struct nv50_format *fmt, unsigned swz, bool tex_int)
{
switch (swz) {
- case PIPE_SWIZZLE_RED:
- return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
- case PIPE_SWIZZLE_GREEN:
- return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
- case PIPE_SWIZZLE_BLUE:
- return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
- case PIPE_SWIZZLE_ALPHA:
- return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_RED : return fmt->tic.src_x;
+ case PIPE_SWIZZLE_GREEN: return fmt->tic.src_y;
+ case PIPE_SWIZZLE_BLUE : return fmt->tic.src_z;
+ case PIPE_SWIZZLE_ALPHA: return fmt->tic.src_w;
case PIPE_SWIZZLE_ONE:
- return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
case PIPE_SWIZZLE_ZERO:
default:
- return NV50_TIC_MAP_ZERO;
+ return G80_TIC_SOURCE_ZERO;
}
}
@@ -73,6 +65,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
{
const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d;
const struct util_format_description *desc;
+ const struct nv50_format *fmt;
uint64_t addr;
uint32_t *tic;
uint32_t swz[4];
@@ -100,19 +93,23 @@ nv50_create_texture_view(struct pipe_context *pipe,
/* TIC[0] */
- tic[0] = nv50_format_table[view->pipe.format].tic;
+ fmt = &nv50_format_table[view->pipe.format];
tex_int = util_format_is_pure_integer(view->pipe.format);
- swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
- swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
- swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
- swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
- tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
- (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
- (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
- (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
- (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+ swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
+ tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
+ (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
+ (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
+ (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
+ (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
+ (swz[3] << G80_TIC_0_W_SOURCE__SHIFT);
addr = mt->base.address;
@@ -124,24 +121,24 @@ nv50_create_texture_view(struct pipe_context *pipe,
depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
}
- tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER;
+ tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+ tic[2] |= G80_TIC_2_SRGB_CONVERSION;
if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
- tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+ tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
if (target == PIPE_BUFFER) {
addr += view->pipe.u.buf.first_element * desc->block.bits / 8;
- tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER;
+ tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
tic[3] = 0;
tic[4] = /* width */
view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
tic[5] = 0;
} else {
- tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
+ tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
tic[3] = mt->level[0].pitch;
tic[4] = mt->base.base.width0;
tic[5] = (1 << 16) | (mt->base.base.height0);
@@ -162,34 +159,34 @@ nv50_create_texture_view(struct pipe_context *pipe,
switch (target) {
case PIPE_TEXTURE_1D:
- tic[2] |= NV50_TIC_2_TARGET_1D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
break;
case PIPE_TEXTURE_2D:
- tic[2] |= NV50_TIC_2_TARGET_2D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_RECT:
- tic[2] |= NV50_TIC_2_TARGET_RECT;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
break;
case PIPE_TEXTURE_3D:
- tic[2] |= NV50_TIC_2_TARGET_3D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
break;
case PIPE_TEXTURE_CUBE:
depth /= 6;
- tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
break;
case PIPE_TEXTURE_1D_ARRAY:
- tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
break;
case PIPE_TEXTURE_2D_ARRAY:
- tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
break;
case PIPE_TEXTURE_CUBE_ARRAY:
depth /= 6;
- tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
break;
case PIPE_BUFFER:
assert(0); /* should be linear and handled above ! */
- tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER | G80_TIC_2_LAYOUT_PITCH;
break;
default:
unreachable("unexpected/invalid texture target");
@@ -202,9 +199,9 @@ nv50_create_texture_view(struct pipe_context *pipe,
tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;
tic[5] |= depth << 16;
if (class_3d > NV50_3D_CLASS)
- tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+ tic[5] |= mt->base.base.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT;
else
- tic[5] |= view->pipe.u.tex.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+ tic[5] |= view->pipe.u.tex.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT;
tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */
@@ -213,9 +210,9 @@ nv50_create_texture_view(struct pipe_context *pipe,
else
tic[7] = 0;
- if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)))
+ if (unlikely(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS)))
if (mt->base.base.last_level)
- tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK;
+ tic[5] &= ~G80_TIC_5_MAP_MIP_LEVEL__MASK;
return &view->pipe;
}
@@ -265,7 +262,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
PUSH_DATA (push, 262144);
@@ -275,7 +272,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
PUSH_DATA (push, txc->offset);
BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
PUSH_DATA (push, 0);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
PUSH_DATA (push, 32);
PUSH_DATA (push, 1);
@@ -364,6 +361,7 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
PUSH_DATA (push, (i << 4) | 0);
continue;
}
+ nv50->seamless_cube_map = tsc->seamless_cube_map;
if (tsc->id < 0) {
tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h
deleted file mode 100644
index a2b9921f647..00000000000
--- a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h
+++ /dev/null
@@ -1,306 +0,0 @@
-#ifndef NV50_TEXTURE_XML
-#define NV50_TEXTURE_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/envytools/envytools/
-git clone https://github.com/envytools/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- rnndb/graph/g80_texture.xml ( 8881 bytes, from 2014-09-25 06:32:11)
-- rnndb/copyright.xml ( 6452 bytes, from 2013-05-14 03:57:49)
-- rnndb/nvchipsets.xml ( 2759 bytes, from 2014-10-05 01:51:02)
-- rnndb/g80_defs.xml ( 18175 bytes, from 2014-09-25 06:32:11)
-
-Copyright (C) 2006-2014 by the following authors:
-- Artur Huillet <[email protected]> (ahuillet)
-- Ben Skeggs (darktama, darktama_)
-- B. R. <[email protected]> (koala_br)
-- Carlos Martin <[email protected]> (carlosmn)
-- Christoph Bumiller <[email protected]> (calim, chrisbmr)
-- Dawid Gajownik <[email protected]> (gajownik)
-- Dmitry Baryshkov
-- Dmitry Eremin-Solenikov <[email protected]> (lumag)
-- EdB <[email protected]> (edb_)
-- Erik Waling <[email protected]> (erikwaling)
-- Francisco Jerez <[email protected]> (curro)
-- imirkin <[email protected]> (imirkin)
-- jb17bsome <[email protected]> (jb17bsome)
-- Jeremy Kolb <[email protected]> (kjeremy)
-- Laurent Carlier <[email protected]> (lordheavy)
-- Luca Barbieri <[email protected]> (lb, lb1)
-- Maarten Maathuis <[email protected]> (stillunknown)
-- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
-- Mark Carey <[email protected]> (careym)
-- Matthieu Castet <[email protected]> (mat-c)
-- nvidiaman <[email protected]> (nvidiaman)
-- Patrice Mandin <[email protected]> (pmandin, pmdata)
-- Pekka Paalanen <[email protected]> (pq, ppaalanen)
-- Peter Popov <[email protected]> (ironpeter)
-- Richard Hughes <[email protected]> (hughsient)
-- Rudi Cilibrasi <[email protected]> (cilibrar)
-- Serge Martin
-- Simon Raffeiner
-- Stephane Loeuillet <[email protected]> (leroutier)
-- Stephane Marchesin <[email protected]> (marcheu)
-- sturmflut <[email protected]> (sturmflut)
-- Sylvain Munaut <[email protected]>
-- Victor Stinner <[email protected]> (haypo)
-- Wladmir van der Laan <[email protected]> (miathan6)
-- Younes Manton <[email protected]> (ymanton)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-#define NV50_TIC_MAP_ZERO 0x00000000
-#define NV50_TIC_MAP_C0 0x00000002
-#define NV50_TIC_MAP_C1 0x00000003
-#define NV50_TIC_MAP_C2 0x00000004
-#define NV50_TIC_MAP_C3 0x00000005
-#define NV50_TIC_MAP_ONE_INT 0x00000006
-#define NV50_TIC_MAP_ONE_FLOAT 0x00000007
-#define NV50_TIC_TYPE_SNORM 0x00000001
-#define NV50_TIC_TYPE_UNORM 0x00000002
-#define NV50_TIC_TYPE_SINT 0x00000003
-#define NV50_TIC_TYPE_UINT 0x00000004
-#define NV50_TIC_TYPE_SSCALED 0x00000005
-#define NV50_TIC_TYPE_USCALED 0x00000006
-#define NV50_TIC_TYPE_FLOAT 0x00000007
-#define NV50_TSC_WRAP_REPEAT 0x00000000
-#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001
-#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
-#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003
-#define NV50_TSC_WRAP_CLAMP 0x00000004
-#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005
-#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006
-#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007
-#define NV50_TIC__SIZE 0x00000020
-#define NV50_TIC_0 0x00000000
-#define NV50_TIC_0_MAPA__MASK 0x38000000
-#define NV50_TIC_0_MAPA__SHIFT 27
-#define NV50_TIC_0_MAPB__MASK 0x07000000
-#define NV50_TIC_0_MAPB__SHIFT 24
-#define NV50_TIC_0_MAPG__MASK 0x00e00000
-#define NV50_TIC_0_MAPG__SHIFT 21
-#define NV50_TIC_0_MAPR__MASK 0x001c0000
-#define NV50_TIC_0_MAPR__SHIFT 18
-#define NV50_TIC_0_TYPE3__MASK 0x00038000
-#define NV50_TIC_0_TYPE3__SHIFT 15
-#define NV50_TIC_0_TYPE2__MASK 0x00007000
-#define NV50_TIC_0_TYPE2__SHIFT 12
-#define NV50_TIC_0_TYPE1__MASK 0x00000e00
-#define NV50_TIC_0_TYPE1__SHIFT 9
-#define NV50_TIC_0_TYPE0__MASK 0x000001c0
-#define NV50_TIC_0_TYPE0__SHIFT 6
-#define NV50_TIC_0_FMT__MASK 0x0000003f
-#define NV50_TIC_0_FMT__SHIFT 0
-#define NV50_TIC_0_FMT_32_32_32_32 0x00000001
-#define NVC0_TIC_0_FMT_32_32_32 0x00000002
-#define NV50_TIC_0_FMT_16_16_16_16 0x00000003
-#define NV50_TIC_0_FMT_32_32 0x00000004
-#define NV50_TIC_0_FMT_32_8_X24 0x00000005
-#define NV50_TIC_0_FMT_8_8_8_8 0x00000008
-#define NV50_TIC_0_FMT_10_10_10_2 0x00000009
-#define NV50_TIC_0_FMT_16_16 0x0000000c
-#define NV50_TIC_0_FMT_24_8 0x0000000d
-#define NV50_TIC_0_FMT_8_24 0x0000000e
-#define NV50_TIC_0_FMT_32 0x0000000f
-#define NVC0_TIC_0_FMT_BPTC_FLOAT 0x00000010
-#define NVC0_TIC_0_FMT_BPTC_UFLOAT 0x00000011
-#define NV50_TIC_0_FMT_4_4_4_4 0x00000012
-#define NV50_TIC_0_FMT_1_5_5_5 0x00000013
-#define NV50_TIC_0_FMT_5_5_5_1 0x00000014
-#define NV50_TIC_0_FMT_5_6_5 0x00000015
-#define NV50_TIC_0_FMT_5_5_6 0x00000016
-#define NVC0_TIC_0_FMT_BPTC 0x00000017
-#define NV50_TIC_0_FMT_8_8 0x00000018
-#define NV50_TIC_0_FMT_16 0x0000001b
-#define NV50_TIC_0_FMT_8 0x0000001d
-#define NV50_TIC_0_FMT_4_4 0x0000001e
-#define NV50_TIC_0_FMT_BITMAP 0x0000001f
-#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020
-#define NV50_TIC_0_FMT_11_11_10 0x00000021
-#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022
-#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023
-#define NV50_TIC_0_FMT_DXT1 0x00000024
-#define NV50_TIC_0_FMT_DXT3 0x00000025
-#define NV50_TIC_0_FMT_DXT5 0x00000026
-#define NV50_TIC_0_FMT_RGTC1 0x00000027
-#define NV50_TIC_0_FMT_RGTC2 0x00000028
-#define NV50_TIC_0_FMT_S8_Z24 0x00000029
-#define NV50_TIC_0_FMT_Z24_X8 0x0000002a
-#define NV50_TIC_0_FMT_Z24_S8 0x0000002b
-#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c
-#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d
-#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e
-#define NV50_TIC_0_FMT_Z32 0x0000002f
-#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030
-#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031
-#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032
-#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033
-#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034
-#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035
-#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036
-#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037
-#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038
-#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039
-#define NV50_TIC_0_FMT_Z16 0x0000003a
-
-#define NV50_TIC_1 0x00000004
-#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff
-#define NV50_TIC_1_OFFSET_LOW__SHIFT 0
-
-#define NV50_TIC_2 0x00000008
-#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff
-#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0
-#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400
-#define NV50_TIC_2_TARGET__MASK 0x0003c000
-#define NV50_TIC_2_TARGET__SHIFT 14
-#define NV50_TIC_2_TARGET_1D 0x00000000
-#define NV50_TIC_2_TARGET_2D 0x00004000
-#define NV50_TIC_2_TARGET_3D 0x00008000
-#define NV50_TIC_2_TARGET_CUBE 0x0000c000
-#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000
-#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000
-#define NV50_TIC_2_TARGET_BUFFER 0x00018000
-#define NV50_TIC_2_TARGET_RECT 0x0001c000
-#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000
-#define NV50_TIC_2_LINEAR 0x00040000
-#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000
-#define NV50_TIC_2_TILE_MODE_X__SHIFT 19
-#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000
-#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22
-#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000
-#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25
-#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000
-#define NV50_TIC_2_2D_UNK0258__SHIFT 28
-#define NV50_TIC_2_NO_BORDER 0x40000000
-#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000
-
-#define NV50_TIC_3 0x0000000c
-#define NV50_TIC_3_PITCH__MASK 0xffffffff
-#define NV50_TIC_3_PITCH__SHIFT 0
-
-#define NV50_TIC_4 0x00000010
-#define NV50_TIC_4_WIDTH__MASK 0xffffffff
-#define NV50_TIC_4_WIDTH__SHIFT 0
-
-#define NV50_TIC_5 0x00000014
-#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000
-#define NV50_TIC_5_LAST_LEVEL__SHIFT 28
-#define NV50_TIC_5_DEPTH__MASK 0x0fff0000
-#define NV50_TIC_5_DEPTH__SHIFT 16
-#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff
-#define NV50_TIC_5_HEIGHT__SHIFT 0
-
-#define NV50_TIC_7 0x0000001c
-#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f
-#define NV50_TIC_7_BASE_LEVEL__SHIFT 0
-#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0
-#define NV50_TIC_7_MAX_LEVEL__SHIFT 4
-#define NV50_TIC_7_MS_MODE__MASK 0x0000f000
-#define NV50_TIC_7_MS_MODE__SHIFT 12
-#define NV50_TIC_7_MS_MODE_MS1 0x00000000
-#define NV50_TIC_7_MS_MODE_MS2 0x00001000
-#define NV50_TIC_7_MS_MODE_MS4 0x00002000
-#define NV50_TIC_7_MS_MODE_MS8 0x00003000
-#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000
-#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000
-#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000
-#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000
-#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000
-#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000
-#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000
-
-#define NV50_TSC__SIZE 0x00000020
-#define NV50_TSC_0 0x00000000
-#define NV50_TSC_0_WRAPS__MASK 0x00000007
-#define NV50_TSC_0_WRAPS__SHIFT 0
-#define NV50_TSC_0_WRAPT__MASK 0x00000038
-#define NV50_TSC_0_WRAPT__SHIFT 3
-#define NV50_TSC_0_WRAPR__MASK 0x000001c0
-#define NV50_TSC_0_WRAPR__SHIFT 6
-#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200
-#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00
-#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10
-#define NV50_TSC_0_SRGB_CONVERSION_ALLOWED 0x00002000
-#define NV50_TSC_0_BOX_S__MASK 0x0001c000
-#define NV50_TSC_0_BOX_S__SHIFT 14
-#define NV50_TSC_0_BOX_T__MASK 0x000e0000
-#define NV50_TSC_0_BOX_T__SHIFT 17
-#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000
-#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20
-
-#define NV50_TSC_1 0x00000004
-#define NV50_TSC_1_UNKN_ANISO_15 0x10000000
-#define NV50_TSC_1_UNKN_ANISO_35 0x18000000
-#define NV50_TSC_1_MAGF__MASK 0x00000003
-#define NV50_TSC_1_MAGF__SHIFT 0
-#define NV50_TSC_1_MAGF_NEAREST 0x00000001
-#define NV50_TSC_1_MAGF_LINEAR 0x00000002
-#define NV50_TSC_1_MINF__MASK 0x00000030
-#define NV50_TSC_1_MINF__SHIFT 4
-#define NV50_TSC_1_MINF_NEAREST 0x00000010
-#define NV50_TSC_1_MINF_LINEAR 0x00000020
-#define NV50_TSC_1_MIPF__MASK 0x000000c0
-#define NV50_TSC_1_MIPF__SHIFT 6
-#define NV50_TSC_1_MIPF_NONE 0x00000040
-#define NV50_TSC_1_MIPF_NEAREST 0x00000080
-#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
-#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200
-#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
-#define NV50_TSC_1_LOD_BIAS__SHIFT 12
-#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000
-
-#define NV50_TSC_2 0x00000008
-#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff
-#define NV50_TSC_2_MIN_LOD__SHIFT 0
-#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000
-#define NV50_TSC_2_MAX_LOD__SHIFT 12
-#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__MASK 0xff000000
-#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__SHIFT 24
-
-#define NV50_TSC_3 0x0000000c
-#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__MASK 0x000ff000
-#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__SHIFT 12
-#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__MASK 0x0ff00000
-#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__SHIFT 20
-
-#define NV50_TSC_4 0x00000010
-#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff
-#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0
-
-#define NV50_TSC_5 0x00000014
-#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff
-#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0
-
-#define NV50_TSC_6 0x00000018
-#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff
-#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0
-
-#define NV50_TSC_7 0x0000001c
-#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff
-#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0
-
-
-#endif /* NV50_TEXTURE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
index 9a3fd1e705f..86a8c159469 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -3,7 +3,7 @@
#include "nv50/nv50_context.h"
-#include "nv50/nv50_defs.xml.h"
+#include "nv50/g80_defs.xml.h"
struct nv50_transfer {
struct pipe_transfer base;
@@ -163,7 +163,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
offset &= ~0xff;
BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
PUSH_DATA (push, 262144);
@@ -173,7 +173,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
PUSH_DATA (push, dst->offset + offset);
BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
PUSH_DATA (push, 0);
- PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);
BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
PUSH_DATA (push, size);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 5369d5207ee..6f60445d8d2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -76,7 +76,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
enum pipe_format fmt = ve->src_format;
so->element[i].pipe = elements[i];
- so->element[i].state = nv50_format_table[fmt].vtx;
+ so->element[i].state = nv50_vertex_format[fmt].vtx;
if (!so->element[i].state) {
switch (util_format_get_nr_components(fmt)) {
@@ -89,7 +89,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
FREE(so);
return NULL;
}
- so->element[i].state = nv50_format_table[fmt].vtx;
+ so->element[i].state = nv50_vertex_format[fmt].vtx;
so->need_conversion = true;
pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
"Converting vertex element %d, no hw format %s",
@@ -816,6 +816,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, 0x20);
}
+ if (nv50->screen->base.class_3d >= NVA0_3D_CLASS &&
+ nv50->seamless_cube_map != nv50->state.seamless_cube_map) {
+ nv50->state.seamless_cube_map = nv50->seamless_cube_map;
+ BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
+ PUSH_DATA (push, nv50->seamless_cube_map ? NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
+ }
+
if (nv50->vbo_fifo) {
nv50_push_vbo(nv50, info);
push->kick_notify = nv50_default_kick_notify;
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index 0a0e187dc02..3479c343261 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -192,6 +192,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVF0_3D_CLASS 0x0000a197
#define NVEA_3D_CLASS 0x0000a297
#define GM107_3D_CLASS 0x0000b097
+#define GM200_3D_CLASS 0x0000b197
#define NV50_2D_CLASS 0x0000502d
#define NVC0_2D_CLASS 0x0000902d
#define NV50_COMPUTE_CLASS 0x000050c0
@@ -200,6 +201,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC8_COMPUTE_CLASS 0x000092c0
#define NVE4_COMPUTE_CLASS 0x0000a0c0
#define NVF0_COMPUTE_CLASS 0x0000a1c0
+#define GM107_COMPUTE_CLASS 0x0000b0c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
diff --git a/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h b/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h
new file mode 100644
index 00000000000..a4bc3805f26
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h
@@ -0,0 +1,365 @@
+#ifndef GM107_TEXTURE_XML
+#define GM107_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/gm107_texture.xml ( 22057 bytes, from 2016-02-12 03:01:43)
+- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40)
+- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-04 22:19:11)
+- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42)
+
+Copyright (C) 2006-2016 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define GM107_TIC2__SIZE 0x00000020
+#define GM107_TIC2_0 0x00000000
+#define GM107_TIC2_0_COMPONENTS_SIZES__MASK 0x0000007f
+#define GM107_TIC2_0_COMPONENTS_SIZES__SHIFT 0
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32_B32_A32 0x00000001
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32_B32 0x00000002
+#define GM107_TIC2_0_COMPONENTS_SIZES_R16_G16_B16_A16 0x00000003
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32 0x00000004
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32_B24G8 0x00000005
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8B8G8R8 0x00000007
+#define GM107_TIC2_0_COMPONENTS_SIZES_A8B8G8R8 0x00000008
+#define GM107_TIC2_0_COMPONENTS_SIZES_A2B10G10R10 0x00000009
+#define GM107_TIC2_0_COMPONENTS_SIZES_R16_G16 0x0000000c
+#define GM107_TIC2_0_COMPONENTS_SIZES_G8R24 0x0000000d
+#define GM107_TIC2_0_COMPONENTS_SIZES_G24R8 0x0000000e
+#define GM107_TIC2_0_COMPONENTS_SIZES_R32 0x0000000f
+#define GM107_TIC2_0_COMPONENTS_SIZES_A4B4G4R4 0x00000012
+#define GM107_TIC2_0_COMPONENTS_SIZES_A5B5G5R1 0x00000013
+#define GM107_TIC2_0_COMPONENTS_SIZES_A1B5G5R5 0x00000014
+#define GM107_TIC2_0_COMPONENTS_SIZES_B5G6R5 0x00000015
+#define GM107_TIC2_0_COMPONENTS_SIZES_B6G5R5 0x00000016
+#define GM107_TIC2_0_COMPONENTS_SIZES_G8R8 0x00000018
+#define GM107_TIC2_0_COMPONENTS_SIZES_R16 0x0000001b
+#define GM107_TIC2_0_COMPONENTS_SIZES_Y8_VIDEO 0x0000001c
+#define GM107_TIC2_0_COMPONENTS_SIZES_R8 0x0000001d
+#define GM107_TIC2_0_COMPONENTS_SIZES_G4R4 0x0000001e
+#define GM107_TIC2_0_COMPONENTS_SIZES_R1 0x0000001f
+#define GM107_TIC2_0_COMPONENTS_SIZES_E5B9G9R9_SHAREDEXP 0x00000020
+#define GM107_TIC2_0_COMPONENTS_SIZES_BF10GF11RF11 0x00000021
+#define GM107_TIC2_0_COMPONENTS_SIZES_G8B8G8R8 0x00000022
+#define GM107_TIC2_0_COMPONENTS_SIZES_B8G8R8G8 0x00000023
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXT1 0x00000024
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXT23 0x00000025
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXT45 0x00000026
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXN1 0x00000027
+#define GM107_TIC2_0_COMPONENTS_SIZES_DXN2 0x00000028
+#define GM107_TIC2_0_COMPONENTS_SIZES_BC6H_SF16 0x00000010
+#define GM107_TIC2_0_COMPONENTS_SIZES_BC6H_UF16 0x00000011
+#define GM107_TIC2_0_COMPONENTS_SIZES_BC7U 0x00000017
+#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGB 0x00000006
+#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGB_PTA 0x0000000a
+#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGBA 0x0000000b
+#define GM107_TIC2_0_COMPONENTS_SIZES_EAC 0x00000019
+#define GM107_TIC2_0_COMPONENTS_SIZES_EACX2 0x0000001a
+#define GM107_TIC2_0_COMPONENTS_SIZES_Z24S8 0x00000029
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24 0x0000002a
+#define GM107_TIC2_0_COMPONENTS_SIZES_S8Z24 0x0000002b
+#define GM107_TIC2_0_COMPONENTS_SIZES_X4V4Z24__COV4R4V 0x0000002c
+#define GM107_TIC2_0_COMPONENTS_SIZES_X4V4Z24__COV8R8V 0x0000002d
+#define GM107_TIC2_0_COMPONENTS_SIZES_V8Z24__COV4R12V 0x0000002e
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32 0x0000002f
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X24S8 0x00000030
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV4R4V 0x00000031
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV8R8V 0x00000032
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV4R4V 0x00000033
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV8R8V 0x00000034
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV4R4V 0x00000035
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV8R8V 0x00000036
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV4R12V 0x00000037
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV4R12V 0x00000038
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV4R12V 0x00000039
+#define GM107_TIC2_0_COMPONENTS_SIZES_Z16 0x0000003a
+#define GM107_TIC2_0_COMPONENTS_SIZES_V8Z24__COV8R24V 0x0000003b
+#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV8R24V 0x0000003c
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV8R24V 0x0000003d
+#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV8R24V 0x0000003e
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_4X4 0x00000040
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_5X4 0x00000050
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_5X5 0x00000041
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_6X5 0x00000051
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_6X6 0x00000042
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X5 0x00000055
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X6 0x00000052
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X8 0x00000044
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X5 0x00000056
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X6 0x00000057
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X8 0x00000053
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X10 0x00000045
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_12X10 0x00000054
+#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_12X12 0x00000046
+#define GM107_TIC2_0_R_DATA_TYPE__MASK 0x00000380
+#define GM107_TIC2_0_R_DATA_TYPE__SHIFT 7
+#define GM107_TIC2_0_G_DATA_TYPE__MASK 0x00001c00
+#define GM107_TIC2_0_G_DATA_TYPE__SHIFT 10
+#define GM107_TIC2_0_B_DATA_TYPE__MASK 0x0000e000
+#define GM107_TIC2_0_B_DATA_TYPE__SHIFT 13
+#define GM107_TIC2_0_A_DATA_TYPE__MASK 0x00070000
+#define GM107_TIC2_0_A_DATA_TYPE__SHIFT 16
+#define GM107_TIC2_0_X_SOURCE__MASK 0x00380000
+#define GM107_TIC2_0_X_SOURCE__SHIFT 19
+#define GM107_TIC2_0_Y_SOURCE__MASK 0x01c00000
+#define GM107_TIC2_0_Y_SOURCE__SHIFT 22
+#define GM107_TIC2_0_Z_SOURCE__MASK 0x0e000000
+#define GM107_TIC2_0_Z_SOURCE__SHIFT 25
+#define GM107_TIC2_0_W_SOURCE__MASK 0x70000000
+#define GM107_TIC2_0_W_SOURCE__SHIFT 28
+#define GM107_TIC2_0_PACK_COMPONENTS 0x80000000
+
+#define GM107_TIC2_1 0x00000004
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_0__MASK 0xffffffff
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_0__SHIFT 0
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__MASK 0xffffffe0
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__SHIFT 5
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__SHR 5
+#define GM107_TIC2_1_GOB_DEPTH_OFFSET__MASK 0x00000060
+#define GM107_TIC2_1_GOB_DEPTH_OFFSET__SHIFT 5
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__MASK 0xfffffe00
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__SHIFT 9
+#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__SHR 9
+
+#define GM107_TIC2_2 0x00000008
+#define GM107_TIC2_2_ADDRESS_BITS_47_TO_32__MASK 0x0000ffff
+#define GM107_TIC2_2_ADDRESS_BITS_47_TO_32__SHIFT 0
+#define GM107_TIC2_2_HEADER_VERSION__MASK 0x00e00000
+#define GM107_TIC2_2_HEADER_VERSION__SHIFT 21
+#define GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER 0x00000000
+#define GM107_TIC2_2_HEADER_VERSION_PITCH_COLORKEY 0x00200000
+#define GM107_TIC2_2_HEADER_VERSION_PITCH 0x00400000
+#define GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR 0x00600000
+#define GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR_COLORKEY 0x00800000
+#define GM107_TIC2_2_RESOURCE_VIEW_COHERENCY_HASH__MASK 0x1e000000
+#define GM107_TIC2_2_RESOURCE_VIEW_COHERENCY_HASH__SHIFT 25
+
+#define GM107_TIC2_3 0x0000000c
+#define GM107_TIC2_3_WIDTH_MINUS_ONE_BITS_31_TO_16__MASK 0x0000ffff
+#define GM107_TIC2_3_WIDTH_MINUS_ONE_BITS_31_TO_16__SHIFT 0
+#define GM107_TIC2_3_PITCH_BITS_20_TO_5__MASK 0x0000ffff
+#define GM107_TIC2_3_PITCH_BITS_20_TO_5__SHIFT 0
+#define GM107_TIC2_3_PITCH_BITS_20_TO_5__SHR 5
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MASK 0x00000007
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__SHIFT 0
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MIN 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MAX 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_ONE 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_TWO 0x00000001
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_FOUR 0x00000002
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_EIGHT 0x00000003
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_SIXTEEN 0x00000004
+#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_THIRTYTWO 0x00000005
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__MASK 0x00000038
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__SHIFT 3
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_ONE 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_TWO 0x00000008
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_FOUR 0x00000010
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_EIGHT 0x00000018
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_SIXTEEN 0x00000020
+#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_THIRTYTWO 0x00000028
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__MASK 0x000001c0
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__SHIFT 6
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_ONE 0x00000000
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_TWO 0x00000040
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_FOUR 0x00000080
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_EIGHT 0x000000c0
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_SIXTEEN 0x00000100
+#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_THIRTYTWO 0x00000140
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS__MASK 0x00001c00
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS__SHIFT 10
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_ONE 0x00000000
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_TWO 0x00000400
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_FOUR 0x00000800
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_EIGHT 0x00000c00
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_SIXTEEN 0x00001000
+#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_THIRTYTWO 0x00001400
+#define GM107_TIC2_3_GOB_3D 0x00002000
+#define GM107_TIC2_3_LOD_ANISO_QUALITY_2 0x00010000
+#define GM107_TIC2_3_LOD_ANISO_QUALITY__MASK 0x00020000
+#define GM107_TIC2_3_LOD_ANISO_QUALITY__SHIFT 17
+#define GM107_TIC2_3_LOD_ANISO_QUALITY_LOW 0x00000000
+#define GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH 0x00020000
+#define GM107_TIC2_3_LOD_ISO_QUALITY__MASK 0x00040000
+#define GM107_TIC2_3_LOD_ISO_QUALITY__SHIFT 18
+#define GM107_TIC2_3_LOD_ISO_QUALITY_LOW 0x00000000
+#define GM107_TIC2_3_LOD_ISO_QUALITY_HIGH 0x00040000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER__MASK 0x00180000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER__SHIFT 19
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_NONE 0x00000000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_ONE 0x00080000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_TWO 0x00100000
+#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_SQRT 0x00180000
+#define GM107_TIC2_3_ANISO_SPREAD_SCALE__MASK 0x03e00000
+#define GM107_TIC2_3_ANISO_SPREAD_SCALE__SHIFT 21
+#define GM107_TIC2_3_USE_HEADER_OPT_CONTROL 0x04000000
+#define GM107_TIC2_3_DEPTH_TEXTURE 0x08000000
+#define GM107_TIC2_3_MAX_MIP_LEVEL__MASK 0xf0000000
+#define GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT 28
+
+#define GM107_TIC2_4 0x00000010
+#define GM107_TIC2_4_WIDTH_MINUS_ONE_BITS_15_TO_0__MASK 0x0000ffff
+#define GM107_TIC2_4_WIDTH_MINUS_ONE_BITS_15_TO_0__SHIFT 0
+#define GM107_TIC2_4_WIDTH_MINUS_ONE__MASK 0x0000ffff
+#define GM107_TIC2_4_WIDTH_MINUS_ONE__SHIFT 0
+#define GM107_TIC2_4_ANISO_SPREAD_MAX_LOG2__MASK 0x00380000
+#define GM107_TIC2_4_ANISO_SPREAD_MAX_LOG2__SHIFT 19
+#define GM107_TIC2_4_SRGB_CONVERSION 0x00400000
+#define GM107_TIC2_4_TEXTURE_TYPE__MASK 0x07800000
+#define GM107_TIC2_4_TEXTURE_TYPE__SHIFT 23
+#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D 0x00000000
+#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D 0x00800000
+#define GM107_TIC2_4_TEXTURE_TYPE_THREE_D 0x01000000
+#define GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP 0x01800000
+#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY 0x02000000
+#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY 0x02800000
+#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER 0x03000000
+#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP 0x03800000
+#define GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY 0x04000000
+#define GM107_TIC2_4_SECTOR_PROMOTION__MASK 0x18000000
+#define GM107_TIC2_4_SECTOR_PROMOTION__SHIFT 27
+#define GM107_TIC2_4_SECTOR_PROMOTION_NO_PROMOTION 0x00000000
+#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V 0x08000000
+#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_H 0x10000000
+#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_4 0x18000000
+#define GM107_TIC2_4_BORDER_SIZE__MASK 0xe0000000
+#define GM107_TIC2_4_BORDER_SIZE__SHIFT 29
+#define GM107_TIC2_4_BORDER_SIZE_ONE 0x00000000
+#define GM107_TIC2_4_BORDER_SIZE_TWO 0x20000000
+#define GM107_TIC2_4_BORDER_SIZE_FOUR 0x40000000
+#define GM107_TIC2_4_BORDER_SIZE_EIGHT 0x60000000
+#define GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR 0xe0000000
+
+#define GM107_TIC2_5 0x00000014
+#define GM107_TIC2_5_HEIGHT_MINUS_ONE__MASK 0x0000ffff
+#define GM107_TIC2_5_HEIGHT_MINUS_ONE__SHIFT 0
+#define GM107_TIC2_5_DEPTH_MINUS_ONE__MASK 0x3fff0000
+#define GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT 16
+#define GM107_TIC2_5_NORMALIZED_COORDS 0x80000000
+
+#define GM107_TIC2_6 0x00000018
+#define GM107_TIC2_6_COLOR_KEY_OP 0x00000001
+#define GM107_TIC2_6_TRILIN_OPT__MASK 0x0000003e
+#define GM107_TIC2_6_TRILIN_OPT__SHIFT 1
+#define GM107_TIC2_6_MIP_LOD_BIAS__MASK 0x0007ffc0
+#define GM107_TIC2_6_MIP_LOD_BIAS__SHIFT 6
+#define GM107_TIC2_6_MIP_LOD_BIAS__RADIX 0x00000008
+#define GM107_TIC2_6_ANISO_BIAS__MASK 0x00780000
+#define GM107_TIC2_6_ANISO_BIAS__SHIFT 19
+#define GM107_TIC2_6_ANISO_BIAS__RADIX 0x00000004
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC__MASK 0x01800000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC__SHIFT 23
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_HALF 0x00000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_ONE 0x00800000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO 0x01000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_MAX 0x01800000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC__MASK 0x06000000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC__SHIFT 25
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_HALF 0x00000000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE 0x02000000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_TWO 0x04000000
+#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_MAX 0x06000000
+#define GM107_TIC2_6_MAX_ANISOTROPY__MASK 0x38000000
+#define GM107_TIC2_6_MAX_ANISOTROPY__SHIFT 27
+#define GM107_TIC2_6_MAX_ANISOTROPY_1_TO_1 0x00000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1 0x08000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_4_TO_1 0x10000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_6_TO_1 0x18000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_8_TO_1 0x20000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_10_TO_1 0x28000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_12_TO_1 0x30000000
+#define GM107_TIC2_6_MAX_ANISOTROPY_16_TO_1 0x38000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER__MASK 0xc0000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER__SHIFT 30
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_NONE 0x00000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_ONE 0x40000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO 0x80000000
+#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_SQRT 0xc0000000
+
+#define GM107_TIC2_7 0x0000001c
+#define GM107_TIC2_7_COLOR_KEY_VALUE__MASK 0xffffffff
+#define GM107_TIC2_7_COLOR_KEY_VALUE__SHIFT 0
+#define GM107_TIC2_7_RES_VIEW_MIN_MIP_LEVEL__MASK 0x0000000f
+#define GM107_TIC2_7_RES_VIEW_MIN_MIP_LEVEL__SHIFT 0
+#define GM107_TIC2_7_RES_VIEW_MAX_MIP_LEVEL__MASK 0x000000f0
+#define GM107_TIC2_7_RES_VIEW_MAX_MIP_LEVEL__SHIFT 4
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT__MASK 0x00000f00
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT 8
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_1X1 0x00000000
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1 0x00000100
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2 0x00000200
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2 0x00000300
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_D3D 0x00000400
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1_D3D 0x00000500
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X4 0x00000600
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2_VC_4 0x00000800
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2_VC_12 0x00000900
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_VC_8 0x00000a00
+#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_VC_24 0x00000b00
+#define GM107_TIC2_7_MIN_LOD_CLAMP__MASK 0x00fff000
+#define GM107_TIC2_7_MIN_LOD_CLAMP__SHIFT 12
+#define GM107_TIC2_7_MIN_LOD_CLAMP__RADIX 0x00000008
+
+
+#endif /* GM107_TEXTURE_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
index 1c0f5835973..52fb0a54812 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile
+++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile
@@ -1,5 +1,5 @@
ENVYAS?=envyas
-TARGETS=com9097.mme.h
+TARGETS=com9097.mme.h com90c0.mme.h
all: $(TARGETS)
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
new file mode 100644
index 00000000000..a9233ad8015
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
@@ -0,0 +1,24 @@
+/* NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT
+ *
+ * arg = num_groups_x
+ * parm[0] = num_groups_y
+ * parm[1] = num_groups_z
+ */
+.section #mme90c0_launch_grid_indirect
+ parm $r2 maddr 0x108e /* GRIDDIM_YX */
+ braz $r1 #fail
+ parm $r3
+ braz annul $r2 #fail
+ braz annul $r3 #fail
+ send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* num_groups_y << 16 | num_groups_x */
+ send $r3
+ maddrsend 0xa7 /* COMPUTE_BEGIN */
+ maddrsend 0x282 /* UNKA08 */
+ maddr 0xda /* LAUNCH */
+ send 0x1000
+ maddrsend 0x281 /* COMPUTE_END */
+ exit maddr 0xd8 /* UNK360 */
+ send 0x1
+fail:
+ exit
+ nop
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
new file mode 100644
index 00000000000..1dc06e5e690
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
@@ -0,0 +1,19 @@
+uint32_t mme90c0_launch_grid_indirect[] = {
+ 0x04238251,
+ 0x00034807,
+ 0x00000301,
+/* 0x000e: fail */
+ 0x0002d027,
+ 0x00029827,
+ 0x84008842,
+ 0x00001841,
+ 0x0029c071,
+ 0x00a08071,
+ 0x00368021,
+ 0x04000041,
+ 0x00a04071,
+ 0x003600a1,
+ 0x00004041,
+ 0x00000091,
+ 0x00000011,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 71804343138..0f1265f5db5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -59,53 +59,63 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
if (ret)
return ret;
- BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->oclass);
/* hardware limit */
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
PUSH_DATA (push, screen->mp_count);
- BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
+ BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
PUSH_DATA (push, 0xf);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
PUSH_DATA (push, 0x8000);
/* global memory setup */
- BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
PUSH_DATA (push, 0);
- BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
+ BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
for (i = 0; i <= 0xff; i++)
PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
PUSH_DATA (push, 1);
/* local memory and cstack setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->tls->offset);
PUSH_DATA (push, screen->tls->offset);
- BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
+ BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
PUSH_DATAh(push, screen->tls->size);
PUSH_DATA (push, screen->tls->size);
- BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
+ BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
- PUSH_DATA (push, 1 << 24);
+ BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
+ PUSH_DATA (push, 0xff << 24);
/* shared memory setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
+ BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
- BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
- PUSH_DATA (push, 2 << 24);
- BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
+ BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
+ PUSH_DATA (push, 0xfe << 24);
+ BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
PUSH_DATA (push, 0);
/* code segment setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
- /* TODO: textures & samplers */
+ /* textures */
+ BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+
+ /* samplers */
+ BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
return 0;
}
@@ -130,7 +140,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
if (likely(prog->code_size)) {
if (nvc0_program_upload_code(nvc0, prog)) {
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
return true;
}
@@ -138,13 +148,149 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
return false;
}
+static void
+nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
+{
+ bool need_flush = nvc0_validate_tsc(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+static void
+nvc0_compute_validate_textures(struct nvc0_context *nvc0)
+{
+ bool need_flush = nvc0_validate_tic(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+static void
+nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const int s = 5;
+
+ while (nvc0->constbuf_dirty[s]) {
+ int i = ffs(nvc0->constbuf_dirty[s]) - 1;
+ nvc0->constbuf_dirty[s] &= ~(1 << i);
+
+ if (nvc0->constbuf[s][i].user) {
+ struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+ const unsigned base = s << 16;
+ const unsigned size = nvc0->constbuf[s][0].size;
+ assert(i == 0); /* we really only want OpenGL uniforms here */
+ assert(nvc0->constbuf[s][0].u.data);
+
+ if (nvc0->state.uniform_buffer_bound[s] < size) {
+ nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+ }
+ nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
+ base, nvc0->state.uniform_buffer_bound[s],
+ 0, (size + 3) / 4,
+ nvc0->constbuf[s][0].u.data);
+ } else {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->constbuf[s][i].u.buf);
+ if (res) {
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->constbuf[s][i].size);
+ PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
+ PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (i << 8) | 1);
+
+ BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
+
+ res->cb_bindings[s] |= 1 << i;
+ } else {
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (i << 8) | 0);
+ }
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ }
+ }
+
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
+}
+
+static void
+nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
+ PUSH_DATA (push, (15 << 8) | 1);
+
+ nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
+}
+
+static void
+nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const int s = 5;
+ int i;
+
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
+ PUSH_DATA (push, 512);
+
+ for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
+ if (nvc0->buffers[s][i].buffer) {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->buffers[s][i].buffer);
+ PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
+ PUSH_DATA (push, 0);
+ BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
+ } else {
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+ }
+}
+
static bool
nvc0_compute_state_validate(struct nvc0_context *nvc0)
{
if (!nvc0_compute_validate_program(nvc0))
return false;
-
- /* TODO: textures, samplers, surfaces, global memory buffers */
+ if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF)
+ nvc0_compute_validate_constbufs(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST)
+ nvc0_compute_validate_driverconst(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_BUFFERS)
+ nvc0_compute_validate_buffers(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
+ nvc0_compute_validate_textures(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
+ nvc0_compute_validate_samplers(nvc0);
+
+ /* TODO: surfaces, global memory buffers */
nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
@@ -166,32 +312,29 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
struct nvc0_program *cp = nvc0->compprog;
if (cp->parm_size) {
- BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
PUSH_DATA (push, align(cp->parm_size, 0x100));
PUSH_DATAh(push, screen->parm->offset);
PUSH_DATA (push, screen->parm->offset);
- BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
PUSH_DATA (push, (0 << 8) | 1);
/* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
- BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
+ BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
PUSH_DATA (push, 0);
PUSH_DATAp(push, input, cp->parm_size / 4);
- BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
}
}
void
-nvc0_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t label,
- const void *input)
+nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *cp = nvc0->compprog;
- unsigned s, i;
+ unsigned s;
int ret;
ret = !nvc0_compute_state_validate(nvc0);
@@ -200,59 +343,69 @@ nvc0_launch_grid(struct pipe_context *pipe,
return;
}
- nvc0_compute_upload_input(nvc0, input);
+ nvc0_compute_upload_input(nvc0, info->input);
- BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
- PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
+ BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
+ PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
- BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
+ BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
PUSH_DATA (push, 0);
PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
- BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
+ BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
- PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
+ PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
PUSH_DATA (push, cp->num_barriers);
- BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
+ BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
PUSH_DATA (push, cp->num_gprs);
- /* grid/block setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
- PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
- PUSH_DATA (push, grid_layout[2]);
- BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
- PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
- PUSH_DATA (push, block_layout[2]);
-
/* launch preliminary setup */
- BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
+ BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
PUSH_DATA (push, 0x1);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
- /* kernel launching */
- BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
- PUSH_DATA (push, 0x1000);
- BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
- PUSH_DATA (push, 0);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
- PUSH_DATA (push, 0x1);
+ /* block setup */
+ BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
+ PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
+ PUSH_DATA (push, info->block[2]);
+
+ if (unlikely(info->indirect)) {
+ struct nv04_resource *res = nv04_resource(info->indirect);
+ uint32_t offset = res->offset + info->indirect_offset;
+ unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
+
+ nouveau_pushbuf_space(push, 16, 0, 1);
+ PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
+ PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
+ nouveau_pushbuf_data(push, res->bo, offset,
+ NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
+ } else {
+ /* grid setup */
+ BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
+ PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
+ PUSH_DATA (push, info->grid[2]);
+
+ /* kernel launching */
+ BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
+ PUSH_DATA (push, 0x1000);
+ BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
+ PUSH_DATA (push, 0x1);
+ }
- /* rebind all the 3D constant buffers
- * (looks like binding a CB on COMPUTE clobbers 3D state) */
- nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
+ nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
for (s = 0; s < 5; s++) {
- for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
- if (nvc0->constbuf[s][i].u.buf)
- nvc0->constbuf_dirty[s] |= 1 << i;
+ nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
+ nvc0->state.uniform_buffer_bound[s] = 0;
}
- memset(nvc0->state.uniform_buffer_bound, 0,
- sizeof(nvc0->state.uniform_buffer_bound));
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
index 168a6d1bee2..a23f7f39dda 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
@@ -1,7 +1,6 @@
#ifndef NVC0_COMPUTE_H
#define NVC0_COMPUTE_H
-#include "nv50/nv50_defs.xml.h"
#include "nvc0/nvc0_compute.xml.h"
bool
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 547b8f5d309..007cccfd10b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -194,8 +194,8 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
if (!--ref)
return ref;
}
@@ -204,8 +204,8 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
if (!--ref)
return ref;
}
@@ -214,16 +214,16 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
if (res->target == PIPE_BUFFER) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
- nvc0->dirty |= NVC0_NEW_ARRAYS;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+ nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX);
if (!--ref)
return ref;
}
}
if (nvc0->idxbuf.buffer == res) {
- nvc0->dirty |= NVC0_NEW_IDXBUF;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+ nvc0->dirty_3d |= NVC0_NEW_3D_IDXBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
if (!--ref)
return ref;
}
@@ -233,35 +233,45 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
if (nvc0->textures[s][i] &&
nvc0->textures[s][i]->texture == res) {
nvc0->textures_dirty[s] |= 1 << i;
- nvc0->dirty |= NVC0_NEW_TEXTURES;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
if (!--ref)
return ref;
}
}
}
- for (s = 0; s < 5; ++s) {
+ for (s = 0; s < 6; ++s) {
for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i) {
if (!(nvc0->constbuf_valid[s] & (1 << i)))
continue;
if (!nvc0->constbuf[s][i].user &&
nvc0->constbuf[s][i].u.buf == res) {
- nvc0->dirty |= NVC0_NEW_CONSTBUF;
nvc0->constbuf_dirty[s] |= 1 << i;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+ if (unlikely(s == 5)) {
+ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
+ } else {
+ nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_CB(s, i));
+ }
if (!--ref)
return ref;
}
}
}
- for (s = 0; s < 5; ++s) {
+ for (s = 0; s < 6; ++s) {
for (i = 0; i < NVC0_MAX_BUFFERS; ++i) {
if (nvc0->buffers[s][i].buffer == res) {
nvc0->buffers_dirty[s] |= 1 << i;
- nvc0->dirty |= NVC0_NEW_BUFFERS;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+ if (unlikely(s == 5)) {
+ nvc0->dirty_cp |= NVC0_NEW_CP_BUFFERS;
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BUF);
+ } else {
+ nvc0->dirty_3d |= NVC0_NEW_3D_BUFFERS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BUF);
+ }
if (!--ref)
return ref;
}
@@ -342,7 +352,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
if (!nvc0->tcp_empty)
goto out_err;
/* set the empty tctl prog on next draw in case one is never set */
- nvc0->dirty |= NVC0_NEW_TCTLPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TCTLPROG;
+
+ /* Do not bind the COMPUTE driver constbuf at screen initialization because
+ * CBs are aliased between 3D and COMPUTE, but make sure it will be bound if
+ * a grid is launched later. */
+ nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
/* now that there are no more opportunities for errors, set the current
* context if there isn't already one.
@@ -358,11 +373,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD;
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->uniform_bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->txc);
if (screen->compute) {
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->uniform_bo);
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc);
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
}
@@ -370,13 +386,13 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR;
if (screen->poly_cache)
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->poly_cache);
if (screen->compute)
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls);
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo);
if (screen->compute)
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 4a6ea867e85..d3e3a818910 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -29,34 +29,35 @@
#include "nvc0/nve4_p2mf.xml.h"
#include "nvc0/nvc0_macros.h"
-/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */
-#define NVC0_NEW_BLEND (1 << 0)
-#define NVC0_NEW_RASTERIZER (1 << 1)
-#define NVC0_NEW_ZSA (1 << 2)
-#define NVC0_NEW_VERTPROG (1 << 3)
-#define NVC0_NEW_TCTLPROG (1 << 4)
-#define NVC0_NEW_TEVLPROG (1 << 5)
-#define NVC0_NEW_GMTYPROG (1 << 6)
-#define NVC0_NEW_FRAGPROG (1 << 7)
-#define NVC0_NEW_BLEND_COLOUR (1 << 8)
-#define NVC0_NEW_STENCIL_REF (1 << 9)
-#define NVC0_NEW_CLIP (1 << 10)
-#define NVC0_NEW_SAMPLE_MASK (1 << 11)
-#define NVC0_NEW_FRAMEBUFFER (1 << 12)
-#define NVC0_NEW_STIPPLE (1 << 13)
-#define NVC0_NEW_SCISSOR (1 << 14)
-#define NVC0_NEW_VIEWPORT (1 << 15)
-#define NVC0_NEW_ARRAYS (1 << 16)
-#define NVC0_NEW_VERTEX (1 << 17)
-#define NVC0_NEW_CONSTBUF (1 << 18)
-#define NVC0_NEW_TEXTURES (1 << 19)
-#define NVC0_NEW_SAMPLERS (1 << 20)
-#define NVC0_NEW_TFB_TARGETS (1 << 21)
-#define NVC0_NEW_IDXBUF (1 << 22)
-#define NVC0_NEW_SURFACES (1 << 23)
-#define NVC0_NEW_MIN_SAMPLES (1 << 24)
-#define NVC0_NEW_TESSFACTOR (1 << 25)
-#define NVC0_NEW_BUFFERS (1 << 26)
+/* NOTE: must keep NVC0_NEW_3D_...PROG in consecutive bits in this order */
+#define NVC0_NEW_3D_BLEND (1 << 0)
+#define NVC0_NEW_3D_RASTERIZER (1 << 1)
+#define NVC0_NEW_3D_ZSA (1 << 2)
+#define NVC0_NEW_3D_VERTPROG (1 << 3)
+#define NVC0_NEW_3D_TCTLPROG (1 << 4)
+#define NVC0_NEW_3D_TEVLPROG (1 << 5)
+#define NVC0_NEW_3D_GMTYPROG (1 << 6)
+#define NVC0_NEW_3D_FRAGPROG (1 << 7)
+#define NVC0_NEW_3D_BLEND_COLOUR (1 << 8)
+#define NVC0_NEW_3D_STENCIL_REF (1 << 9)
+#define NVC0_NEW_3D_CLIP (1 << 10)
+#define NVC0_NEW_3D_SAMPLE_MASK (1 << 11)
+#define NVC0_NEW_3D_FRAMEBUFFER (1 << 12)
+#define NVC0_NEW_3D_STIPPLE (1 << 13)
+#define NVC0_NEW_3D_SCISSOR (1 << 14)
+#define NVC0_NEW_3D_VIEWPORT (1 << 15)
+#define NVC0_NEW_3D_ARRAYS (1 << 16)
+#define NVC0_NEW_3D_VERTEX (1 << 17)
+#define NVC0_NEW_3D_CONSTBUF (1 << 18)
+#define NVC0_NEW_3D_TEXTURES (1 << 19)
+#define NVC0_NEW_3D_SAMPLERS (1 << 20)
+#define NVC0_NEW_3D_TFB_TARGETS (1 << 21)
+#define NVC0_NEW_3D_IDXBUF (1 << 22)
+#define NVC0_NEW_3D_SURFACES (1 << 23)
+#define NVC0_NEW_3D_MIN_SAMPLES (1 << 24)
+#define NVC0_NEW_3D_TESSFACTOR (1 << 25)
+#define NVC0_NEW_3D_BUFFERS (1 << 26)
+#define NVC0_NEW_3D_DRIVERCONST (1 << 27)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
@@ -64,20 +65,22 @@
#define NVC0_NEW_CP_SAMPLERS (1 << 3)
#define NVC0_NEW_CP_CONSTBUF (1 << 4)
#define NVC0_NEW_CP_GLOBALS (1 << 5)
+#define NVC0_NEW_CP_DRIVERCONST (1 << 6)
+#define NVC0_NEW_CP_BUFFERS (1 << 7)
/* 3d bufctx (during draw_vbo, blit_3d) */
-#define NVC0_BIND_FB 0
-#define NVC0_BIND_VTX 1
-#define NVC0_BIND_VTX_TMP 2
-#define NVC0_BIND_IDX 3
-#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i))
-#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
-#define NVC0_BIND_TFB 244
-#define NVC0_BIND_SUF 245
-#define NVC0_BIND_BUF 246
-#define NVC0_BIND_SCREEN 247
-#define NVC0_BIND_TLS 249
-#define NVC0_BIND_3D_COUNT 250
+#define NVC0_BIND_3D_FB 0
+#define NVC0_BIND_3D_VTX 1
+#define NVC0_BIND_3D_VTX_TMP 2
+#define NVC0_BIND_3D_IDX 3
+#define NVC0_BIND_3D_TEX(s, i) ( 4 + 32 * (s) + (i))
+#define NVC0_BIND_3D_CB(s, i) (164 + 16 * (s) + (i))
+#define NVC0_BIND_3D_TFB 244
+#define NVC0_BIND_3D_SUF 245
+#define NVC0_BIND_3D_BUF 246
+#define NVC0_BIND_3D_SCREEN 247
+#define NVC0_BIND_3D_TLS 249
+#define NVC0_BIND_3D_COUNT 250
/* compute bufctx (during launch_grid) */
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
@@ -87,7 +90,8 @@
#define NVC0_BIND_CP_DESC 50
#define NVC0_BIND_CP_SCREEN 51
#define NVC0_BIND_CP_QUERY 52
-#define NVC0_BIND_CP_COUNT 53
+#define NVC0_BIND_CP_BUF 53
+#define NVC0_BIND_CP_COUNT 54
/* bufctx for other operations */
#define NVC0_BIND_2D 0
@@ -114,7 +118,7 @@ struct nvc0_context {
const struct nv50_m2mf_rect *src,
uint32_t nblocksx, uint32_t nblocksy);
- uint32_t dirty;
+ uint32_t dirty_3d; /* dirty flags for 3d state */
uint32_t dirty_cp; /* dirty flags for compute state */
struct nvc0_graph_state state;
@@ -157,6 +161,7 @@ struct nvc0_context {
struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
unsigned num_samplers[6];
uint16_t samplers_dirty[6];
+ bool seamless_cube_map;
uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
@@ -267,6 +272,8 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers,
extern void nvc0_init_surface_functions(struct nvc0_context *);
/* nvc0_tex.c */
+bool nvc0_validate_tic(struct nvc0_context *nvc0, int s);
+bool nvc0_validate_tsc(struct nvc0_context *nvc0, int s);
bool nve4_validate_tsc(struct nvc0_context *nvc0, int s);
void nvc0_validate_textures(struct nvc0_context *);
void nvc0_validate_samplers(struct nvc0_context *);
@@ -331,11 +338,9 @@ nvc0_video_buffer_create(struct pipe_context *pipe,
void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
/* nve4_compute.c */
-void nve4_launch_grid(struct pipe_context *,
- const uint *, const uint *, uint32_t, const void *);
+void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
/* nvc0_compute.c */
-void nvc0_launch_grid(struct pipe_context *,
- const uint *, const uint *, uint32_t, const void *);
+void nvc0_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index 49e176cbd49..eeacc714f3e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -35,4 +35,6 @@
#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
+#define NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT 0x00003860
+
#endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 93f211bd5fc..bc884d6c08f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -544,6 +544,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
+ } else {
+ info->io.resInfoCBSlot = 15;
+ info->io.suInfoBase = 512;
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 721857edecc..f5f9bb39fd9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -846,15 +846,15 @@ nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
/* configure and reset the counter(s) */
if (d == 0)
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_A_SIGSEL(c & 3)), 1);
else
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_B_SIGSEL(c & 3)), 1);
PUSH_DATA (push, cfg->ctr[i].sig_sel);
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_SRCSEL(c)), 1);
PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_FUNC(c)), 1);
PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_SET(c)), 1);
PUSH_DATA (push, 0);
}
return true;
@@ -917,13 +917,13 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
mask_sel &= cfg->ctr[i].src_mask;
/* configure and reset the counter(s) */
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(c)), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_SIGSEL(c)), 1);
PUSH_DATA (push, cfg->ctr[i].sig_sel);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_SRCSEL(c)), 1);
PUSH_DATA (push, cfg->ctr[i].src_sel | mask_sel);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_OP(c)), 1);
PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(c)), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_SET(c)), 1);
PUSH_DATA (push, 0);
}
return true;
@@ -937,11 +937,12 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
+ struct pipe_grid_info info = {};
uint32_t mask;
uint32_t input[3];
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 };
- unsigned c;
+ unsigned c, i;
if (unlikely(!screen->pm.prog)) {
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
@@ -965,9 +966,9 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
for (c = 0; c < 8; ++c)
if (screen->pm.mp_counter[c]) {
if (is_nve4) {
- IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
+ IMMED_NVC0(push, NVE4_CP(MP_PM_FUNC(c)), 0);
} else {
- IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
+ IMMED_NVC0(push, NVC0_CP(MP_PM_OP(c)), 0);
}
}
/* release counters for this query */
@@ -983,13 +984,20 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
hq->bo);
PUSH_SPACE(push, 1);
- IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
+ IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0);
pipe->bind_compute_state(pipe, screen->pm.prog);
input[0] = (hq->bo->offset + hq->base_offset);
input[1] = (hq->bo->offset + hq->base_offset) >> 32;
input[2] = hq->sequence;
- pipe->launch_grid(pipe, block, grid, 0, input);
+
+ for (i = 0; i < 3; i++) {
+ info.block[i] = block[i];
+ info.grid[i] = grid[i];
+ }
+ info.pc = 0;
+ info.input = input;
+ pipe->launch_grid(pipe, &info);
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
@@ -1010,9 +1018,9 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
break;
mask |= 1 << hsq->ctr[i];
if (is_nve4) {
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(hsq->ctr[i])), 1);
+ BEGIN_NVC0(push, NVE4_CP(MP_PM_FUNC(hsq->ctr[i])), 1);
} else {
- BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(hsq->ctr[i])), 1);
+ BEGIN_NVC0(push, NVC0_CP(MP_PM_OP(hsq->ctr[i])), 1);
}
PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index d368fda707d..998e9ea47ef 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -36,6 +36,7 @@
#include "nvc0/nvc0_screen.h"
#include "nvc0/mme/com9097.mme.h"
+#include "nvc0/mme/com90c0.mme.h"
static boolean
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -61,7 +62,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_SHARED);
- return (nvc0_format_table[format].usage & bindings) == bindings;
+ return (( nvc0_format_table[format].usage |
+ nvc0_vertex_format[format].usage) & bindings) == bindings;
}
static int
@@ -196,6 +198,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_COMPUTE:
+ if (debug_get_bool_option("NVF0_COMPUTE", false))
+ return 1;
return (class_3d <= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
@@ -262,8 +266,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 0;
break;
case PIPE_SHADER_COMPUTE:
- if (class_3d > NVE4_3D_CLASS)
- return 0;
+ if (!debug_get_bool_option("NVF0_COMPUTE", false))
+ if (class_3d > NVE4_3D_CLASS)
+ return 0;
break;
default:
return 0;
@@ -272,6 +277,10 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ if (class_3d >= NVE4_3D_CLASS)
+ return 0;
+ return 1 << PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
@@ -336,6 +345,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16; /* XXX not sure if more are really safe */
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ return 0;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
return 0;
@@ -598,6 +609,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
case 0xf0:
case 0x100:
case 0x110:
+ if (debug_get_bool_option("NVF0_COMPUTE", false))
+ return nve4_screen_compute_setup(screen, screen->base.pushbuf);
+ case 0x120:
return 0;
default:
return -1;
@@ -660,6 +674,7 @@ nvc0_screen_create(struct nouveau_device *dev)
case 0xf0:
case 0x100:
case 0x110:
+ case 0x120:
break;
default:
return NULL;
@@ -728,6 +743,7 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->nvsw->handle);
switch (dev->chipset & ~0xf) {
+ case 0x120:
case 0x110:
case 0x100:
case 0xf0:
@@ -779,6 +795,9 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->fence.bo->offset + 16);
switch (dev->chipset & ~0xf) {
+ case 0x120:
+ obj_class = GM200_3D_CLASS;
+ break;
case 0x110:
obj_class = GM107_3D_CLASS;
break;
@@ -860,8 +879,7 @@ nvc0_screen_create(struct nouveau_device *dev)
BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
if (screen->eng3d->oclass < NVE4_3D_CLASS) {
- BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
- PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ IMMED_NVC0(push, NVC0_3D(TEX_MISC), 0);
} else {
BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1);
PUSH_DATA (push, 15);
@@ -887,7 +905,7 @@ nvc0_screen_create(struct nouveau_device *dev)
*/
nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
- ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 6 << 16, NULL,
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL,
&screen->uniform_bo);
if (ret)
goto fail;
@@ -899,8 +917,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
@@ -920,8 +938,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 256);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 0.0f);
@@ -929,8 +947,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
@@ -988,6 +1006,14 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAh(push, screen->txc->offset);
PUSH_DATA (push, screen->txc->offset);
PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+ if (screen->eng3d->oclass >= GM107_3D_CLASS) {
+ screen->tic.maxwell = true;
+ if (screen->eng3d->oclass == GM107_3D_CLASS) {
+ screen->tic.maxwell =
+ debug_get_bool_option("NOUVEAU_MAXWELL_TIC", true);
+ IMMED_NVC0(push, SUBC_3D(0x0f10), screen->tic.maxwell);
+ }
+ }
BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset + 65536);
@@ -1051,6 +1077,7 @@ nvc0_screen_create(struct nouveau_device *dev)
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+ MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 1a56177815c..8487abcf999 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -51,8 +51,9 @@ struct nvc0_graph_state {
uint8_t c14_bound; /* whether immediate array constbuf is bound */
uint8_t clip_enable;
uint32_t clip_mode;
- uint32_t uniform_buffer_bound[5];
+ uint32_t uniform_buffer_bound[6];
struct nvc0_transform_feedback_state *tfb;
+ bool seamless_cube_map;
};
struct nvc0_screen {
@@ -83,6 +84,7 @@ struct nvc0_screen {
void **entries;
int next;
uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
+ bool maxwell;
} tic;
struct {
@@ -164,12 +166,27 @@ nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
struct nvc0_format {
uint32_t rt;
- uint32_t tic;
+ struct {
+ unsigned format:7;
+ unsigned type_r:3;
+ unsigned type_g:3;
+ unsigned type_b:3;
+ unsigned type_a:3;
+ unsigned src_x:3;
+ unsigned src_y:3;
+ unsigned src_z:3;
+ unsigned src_w:3;
+ } tic;
+ uint32_t usage;
+};
+
+struct nvc0_vertex_format {
uint32_t vtx;
uint32_t usage;
};
extern const struct nvc0_format nvc0_format_table[];
+extern const struct nvc0_vertex_format nvc0_vertex_format[];
static inline void
nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 382a18ef153..2f46c436a4c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -37,11 +37,11 @@ nvc0_program_update_context_state(struct nvc0_context *nvc0,
if (prog && prog->need_tls) {
const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
if (!nvc0->state.tls_required)
- BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls);
nvc0->state.tls_required |= 1 << stage;
} else {
if (nvc0->state.tls_required == (1 << stage))
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS);
nvc0->state.tls_required &= ~(1 << stage);
}
@@ -152,7 +152,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
NVC0_3D_SHADE_MODEL_SMOOTH);
}
- if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) {
+ if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) {
return;
}
@@ -292,9 +292,9 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
}
nvc0->state.tfb = tfb;
- if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
+ if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS))
return;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TFB);
for (b = 0; b < nvc0->num_tfbbufs; ++b) {
struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
@@ -310,7 +310,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
buf = nv04_resource(targ->pipe.buffer);
- BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR);
if (!(nvc0->tfbbuf_dirty & (1 << b)))
continue;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index cf3d3497c78..7ccce9ff6bf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -21,6 +21,7 @@
*/
#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
@@ -32,7 +33,6 @@
#include "nvc0/nvc0_query_hw.h"
#include "nvc0/nvc0_3d.xml.h"
-#include "nv50/nv50_texture.xml.h"
#include "nouveau_gldefs.h"
@@ -186,7 +186,7 @@ nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->blend = hwcso;
- nvc0->dirty |= NVC0_NEW_BLEND;
+ nvc0->dirty_3d |= NVC0_NEW_3D_BLEND;
}
static void
@@ -315,7 +315,7 @@ nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->rast = hwcso;
- nvc0->dirty |= NVC0_NEW_RASTERIZER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_RASTERIZER;
}
static void
@@ -393,7 +393,7 @@ nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->zsa = hwcso;
- nvc0->dirty |= NVC0_NEW_ZSA;
+ nvc0->dirty_3d |= NVC0_NEW_3D_ZSA;
}
static void
@@ -449,7 +449,7 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
nvc0->num_samplers[s] = nr;
- nvc0->dirty |= NVC0_NEW_SAMPLERS;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
}
static void
@@ -566,7 +566,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
}
if (old) {
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
nvc0_screen_tic_unlock(nvc0->screen, old);
}
@@ -576,7 +576,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
for (i = nr; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
if (old) {
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
nvc0_screen_tic_unlock(nvc0->screen, old);
pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
}
@@ -584,7 +584,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
nvc0->num_textures[s] = nr;
- nvc0->dirty |= NVC0_NEW_TEXTURES;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
}
static void
@@ -594,7 +594,7 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
{
struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d;
const unsigned end = start + nr;
- const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0);
+ const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_3D_TEX(s, 0);
int last_valid = -1;
unsigned i;
@@ -733,7 +733,7 @@ nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->vertprog = hwcso;
- nvc0->dirty |= NVC0_NEW_VERTPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_VERTPROG;
}
static void *
@@ -749,7 +749,7 @@ nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->fragprog = hwcso;
- nvc0->dirty |= NVC0_NEW_FRAGPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAGPROG;
}
static void *
@@ -765,7 +765,7 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->gmtyprog = hwcso;
- nvc0->dirty |= NVC0_NEW_GMTYPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_GMTYPROG;
}
static void *
@@ -781,7 +781,7 @@ nvc0_tcp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->tctlprog = hwcso;
- nvc0->dirty |= NVC0_NEW_TCTLPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TCTLPROG;
}
static void *
@@ -797,7 +797,7 @@ nvc0_tep_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->tevlprog = hwcso;
- nvc0->dirty |= NVC0_NEW_TEVLPROG;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEVLPROG;
}
static void *
@@ -839,7 +839,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
const unsigned i = index;
if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
- assert(!cb || !cb->user_buffer);
+ if (nvc0->constbuf[s][i].user)
+ nvc0->constbuf[s][i].u.buf = NULL;
+ else
if (nvc0->constbuf[s][i].u.buf)
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
@@ -849,9 +851,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nvc0->constbuf[s][i].u.buf = NULL;
else
if (nvc0->constbuf[s][i].u.buf)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_CB(s, i));
- nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
}
nvc0->constbuf_dirty[s] |= 1 << i;
@@ -891,7 +893,7 @@ nvc0_set_blend_color(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->blend_colour = *bcol;
- nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
+ nvc0->dirty_3d |= NVC0_NEW_3D_BLEND_COLOUR;
}
static void
@@ -901,7 +903,7 @@ nvc0_set_stencil_ref(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->stencil_ref = *sr;
- nvc0->dirty |= NVC0_NEW_STENCIL_REF;
+ nvc0->dirty_3d |= NVC0_NEW_3D_STENCIL_REF;
}
static void
@@ -912,7 +914,7 @@ nvc0_set_clip_state(struct pipe_context *pipe,
memcpy(nvc0->clip.ucp, clip->ucp, sizeof(clip->ucp));
- nvc0->dirty |= NVC0_NEW_CLIP;
+ nvc0->dirty_3d |= NVC0_NEW_3D_CLIP;
}
static void
@@ -921,7 +923,7 @@ nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->sample_mask = sample_mask;
- nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_MASK;
}
static void
@@ -931,7 +933,7 @@ nvc0_set_min_samples(struct pipe_context *pipe, unsigned min_samples)
if (nvc0->min_samples != min_samples) {
nvc0->min_samples = min_samples;
- nvc0->dirty |= NVC0_NEW_MIN_SAMPLES;
+ nvc0->dirty_3d |= NVC0_NEW_3D_MIN_SAMPLES;
}
}
@@ -940,23 +942,12 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *fb)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
- unsigned i;
-
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
- for (i = 0; i < fb->nr_cbufs; ++i)
- pipe_surface_reference(&nvc0->framebuffer.cbufs[i], fb->cbufs[i]);
- for (; i < nvc0->framebuffer.nr_cbufs; ++i)
- pipe_surface_reference(&nvc0->framebuffer.cbufs[i], NULL);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
- nvc0->framebuffer.nr_cbufs = fb->nr_cbufs;
+ util_copy_framebuffer_state(&nvc0->framebuffer, fb);
- nvc0->framebuffer.width = fb->width;
- nvc0->framebuffer.height = fb->height;
-
- pipe_surface_reference(&nvc0->framebuffer.zsbuf, fb->zsbuf);
-
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
static void
@@ -966,7 +957,7 @@ nvc0_set_polygon_stipple(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->stipple = *stipple;
- nvc0->dirty |= NVC0_NEW_STIPPLE;
+ nvc0->dirty_3d |= NVC0_NEW_3D_STIPPLE;
}
static void
@@ -984,7 +975,7 @@ nvc0_set_scissor_states(struct pipe_context *pipe,
continue;
nvc0->scissors[start_slot + i] = scissor[i];
nvc0->scissors_dirty |= 1 << (start_slot + i);
- nvc0->dirty |= NVC0_NEW_SCISSOR;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SCISSOR;
}
}
@@ -1003,7 +994,7 @@ nvc0_set_viewport_states(struct pipe_context *pipe,
continue;
nvc0->viewports[start_slot + i] = vpt[i];
nvc0->viewports_dirty |= 1 << (start_slot + i);
- nvc0->dirty |= NVC0_NEW_VIEWPORT;
+ nvc0->dirty_3d |= NVC0_NEW_3D_VIEWPORT;
}
}
@@ -1017,7 +1008,7 @@ nvc0_set_tess_state(struct pipe_context *pipe,
memcpy(nvc0->default_tess_outer, default_tess_outer, 4 * sizeof(float));
memcpy(nvc0->default_tess_inner, default_tess_inner, 2 * sizeof(float));
- nvc0->dirty |= NVC0_NEW_TESSFACTOR;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TESSFACTOR;
}
static void
@@ -1028,8 +1019,8 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
unsigned i;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
- nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX);
+ nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS;
util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb,
start_slot, count);
@@ -1071,20 +1062,20 @@ nvc0_set_index_buffer(struct pipe_context *pipe,
struct nvc0_context *nvc0 = nvc0_context(pipe);
if (nvc0->idxbuf.buffer)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
if (ib) {
pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer);
nvc0->idxbuf.index_size = ib->index_size;
if (ib->buffer) {
nvc0->idxbuf.offset = ib->offset;
- nvc0->dirty |= NVC0_NEW_IDXBUF;
+ nvc0->dirty_3d |= NVC0_NEW_3D_IDXBUF;
} else {
nvc0->idxbuf.user_buffer = ib->user_buffer;
- nvc0->dirty &= ~NVC0_NEW_IDXBUF;
+ nvc0->dirty_3d &= ~NVC0_NEW_3D_IDXBUF;
}
} else {
- nvc0->dirty &= ~NVC0_NEW_IDXBUF;
+ nvc0->dirty_3d &= ~NVC0_NEW_3D_IDXBUF;
pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
}
}
@@ -1095,7 +1086,7 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->vertex = hwcso;
- nvc0->dirty |= NVC0_NEW_VERTEX;
+ nvc0->dirty_3d |= NVC0_NEW_3D_VERTEX;
}
static struct pipe_stream_output_target *
@@ -1194,7 +1185,7 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
nvc0->num_tfbbufs = num_targets;
if (nvc0->tfbbuf_dirty)
- nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
+ nvc0->dirty_3d |= NVC0_NEW_3D_TFB_TARGETS;
}
static void
@@ -1223,7 +1214,7 @@ nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t,
nvc0->surfaces_dirty[t] |= mask;
if (t == 0)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
else
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
}
@@ -1241,7 +1232,7 @@ nvc0_set_compute_resources(struct pipe_context *pipe,
static void
nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
unsigned start_slot, unsigned count,
- struct pipe_image_view **views)
+ struct pipe_image_view *views)
{
}
@@ -1254,7 +1245,7 @@ nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
const unsigned mask = ((1 << nr) - 1) << start;
unsigned i;
- assert(t < 5);
+ assert(t < 6);
if (pbuffers) {
for (i = start; i < end; ++i) {
@@ -1274,7 +1265,11 @@ nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
}
nvc0->buffers_dirty[t] |= mask;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+ if (t == 5)
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BUF);
+ else
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BUF);
+
}
static void
@@ -1286,7 +1281,10 @@ nvc0_set_shader_buffers(struct pipe_context *pipe,
const unsigned s = nvc0_shader_stage(shader);
nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers);
- nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS;
+ if (s == 5)
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_BUFFERS;
+ else
+ nvc0_context(pipe)->dirty_3d |= NVC0_NEW_3D_BUFFERS;
}
static inline void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index e0d8ab01776..18e79e36b85 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -3,7 +3,6 @@
#include "util/u_math.h"
#include "nvc0/nvc0_context.h"
-#include "nv50/nv50_defs.xml.h"
#if 0
static void
@@ -77,7 +76,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
bool serialize = false;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
@@ -142,7 +141,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
/* only register for writing, otherwise we'd always serialize here */
- BCTX_REFN(nvc0->bufctx_3d, FB, res, WR);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR);
}
if (fb->zsbuf) {
@@ -173,7 +172,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
- BCTX_REFN(nvc0->bufctx_3d, FB, &mt->base, WR);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR);
} else {
BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -184,8 +183,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
PUSH_DATA (push, 256 + 128);
for (i = 0; i < ms; i++) {
@@ -240,7 +239,7 @@ nvc0_validate_scissor(struct nvc0_context *nvc0)
int i;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- if (!(nvc0->dirty & NVC0_NEW_SCISSOR) &&
+ if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) &&
nvc0->rast->pipe.scissor == nvc0->state.scissor)
return;
@@ -318,8 +317,8 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10));
- PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATAh(push, bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATA (push, bo->offset + (6 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
PUSH_DATA (push, 256);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
@@ -368,7 +367,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0)
if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
nvc0_check_program_ucps(nvc0, vp, clip_enable);
- if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage)))
+ if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage)))
if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
nvc0_upload_uclip_planes(nvc0, stage);
@@ -455,7 +454,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
PUSH_DATA (push, (i << 4) | 1);
- BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
res->cb_bindings[s] |= 1 << i;
@@ -468,6 +467,11 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
}
}
}
+
+ /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
+ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
+ nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
+ nvc0->state.uniform_buffer_bound[5] = 0;
}
static void
@@ -479,8 +483,8 @@ nvc0_validate_buffers(struct nvc0_context *nvc0)
for (s = 0; s < 5; s++) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
PUSH_DATA (push, 512);
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
@@ -491,7 +495,7 @@ nvc0_validate_buffers(struct nvc0_context *nvc0)
PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
PUSH_DATA (push, 0);
- BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
} else {
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
@@ -536,6 +540,25 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
}
+static void
+nvc0_validate_driverconst(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ int i;
+
+ for (i = 0; i < 5; ++i) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
+ PUSH_DATA (push, (15 << 4) | 1);
+ }
+
+ nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
+}
+
void
nvc0_validate_global_residents(struct nvc0_context *nvc0,
struct nouveau_bufctx *bctx, int bin)
@@ -629,35 +652,37 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
else
ctx_to->state = ctx_to->screen->save_state;
- ctx_to->dirty = ~0;
+ ctx_to->dirty_3d = ~0;
+ ctx_to->dirty_cp = ~0;
ctx_to->viewports_dirty = ~0;
ctx_to->scissors_dirty = ~0;
- for (s = 0; s < 5; ++s) {
+ for (s = 0; s < 6; ++s) {
ctx_to->samplers_dirty[s] = ~0;
ctx_to->textures_dirty[s] = ~0;
ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
+ ctx_to->buffers_dirty[s] = ~0;
}
/* Reset tfb as the shader that owns it may have been deleted. */
ctx_to->state.tfb = NULL;
if (!ctx_to->vertex)
- ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
+ ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
if (!ctx_to->idxbuf.buffer)
- ctx_to->dirty &= ~NVC0_NEW_IDXBUF;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_IDXBUF;
if (!ctx_to->vertprog)
- ctx_to->dirty &= ~NVC0_NEW_VERTPROG;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG;
if (!ctx_to->fragprog)
- ctx_to->dirty &= ~NVC0_NEW_FRAGPROG;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG;
if (!ctx_to->blend)
- ctx_to->dirty &= ~NVC0_NEW_BLEND;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND;
if (!ctx_to->rast)
- ctx_to->dirty &= ~(NVC0_NEW_RASTERIZER | NVC0_NEW_SCISSOR);
+ ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR);
if (!ctx_to->zsa)
- ctx_to->dirty &= ~NVC0_NEW_ZSA;
+ ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA;
ctx_to->screen->cur_ctx = ctx_to;
}
@@ -666,40 +691,41 @@ static struct state_validate {
void (*func)(struct nvc0_context *);
uint32_t states;
} validate_list[] = {
- { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER },
- { nvc0_validate_blend, NVC0_NEW_BLEND },
- { nvc0_validate_zsa, NVC0_NEW_ZSA },
- { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK },
- { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER },
- { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
- { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
- { nvc0_validate_stipple, NVC0_NEW_STIPPLE },
- { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER },
- { nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
- { nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
- { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
- { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
- { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
- { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
- { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER },
- { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
- NVC0_NEW_RASTERIZER },
- { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
- { nvc0_validate_derived_3, NVC0_NEW_BLEND | NVC0_NEW_FRAMEBUFFER },
- { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
- NVC0_NEW_VERTPROG |
- NVC0_NEW_TEVLPROG |
- NVC0_NEW_GMTYPROG },
- { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
- { nvc0_validate_textures, NVC0_NEW_TEXTURES },
- { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
- { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
- { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
- { nvc0_validate_surfaces, NVC0_NEW_SURFACES },
- { nvc0_validate_buffers, NVC0_NEW_BUFFERS },
- { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
- { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG },
- { nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES },
+ { nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER },
+ { nvc0_validate_blend, NVC0_NEW_3D_BLEND },
+ { nvc0_validate_zsa, NVC0_NEW_3D_ZSA },
+ { nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK },
+ { nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER },
+ { nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR },
+ { nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF },
+ { nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE },
+ { nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER },
+ { nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT },
+ { nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG },
+ { nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG },
+ { nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG },
+ { nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR },
+ { nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },
+ { nvc0_validate_derived_1, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |
+ NVC0_NEW_3D_RASTERIZER },
+ { nvc0_validate_derived_2, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER },
+ { nvc0_validate_derived_3, NVC0_NEW_3D_BLEND | NVC0_NEW_3D_FRAMEBUFFER },
+ { nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER |
+ NVC0_NEW_3D_VERTPROG |
+ NVC0_NEW_3D_TEVLPROG |
+ NVC0_NEW_3D_GMTYPROG },
+ { nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF },
+ { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
+ { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
+ { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
+ { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
+ { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
+ { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },
+ { nvc0_idxbuf_validate, NVC0_NEW_3D_IDXBUF },
+ { nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },
+ { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES },
+ { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST },
};
bool
@@ -712,7 +738,7 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask)
if (nvc0->screen->cur_ctx != nvc0)
nvc0_switch_pipe_context(nvc0);
- state_mask = nvc0->dirty & mask;
+ state_mask = nvc0->dirty_3d & mask;
if (state_mask) {
for (i = 0; i < ARRAY_SIZE(validate_list); ++i) {
@@ -721,7 +747,7 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask)
if (state_mask & validate->states)
validate->func(nvc0);
}
- nvc0->dirty &= ~state_mask;
+ nvc0->dirty_3d &= ~state_mask;
nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, false);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index e3843ca1bf1..49577969d3d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -34,8 +34,8 @@
#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_resource.h"
-#include "nv50/nv50_defs.xml.h"
-#include "nv50/nv50_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
+#include "nv50/g80_texture.xml.h"
/* these are used in nv50_blit.h */
#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
@@ -54,7 +54,7 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
/* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */
if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal)
- return NV50_SURFACE_FORMAT_A8_UNORM;
+ return G80_SURFACE_FORMAT_A8_UNORM;
/* Hardware values for color formats range from 0xc0 to 0xff,
* but the 2D engine doesn't support all of them.
@@ -65,15 +65,15 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
switch (util_format_get_blocksize(format)) {
case 1:
- return NV50_SURFACE_FORMAT_R8_UNORM;
+ return G80_SURFACE_FORMAT_R8_UNORM;
case 2:
- return NV50_SURFACE_FORMAT_RG8_UNORM;
+ return G80_SURFACE_FORMAT_RG8_UNORM;
case 4:
- return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ return G80_SURFACE_FORMAT_BGRA8_UNORM;
case 8:
- return NV50_SURFACE_FORMAT_RGBA16_UNORM;
+ return G80_SURFACE_FORMAT_RGBA16_UNORM;
case 16:
- return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
+ return G80_SURFACE_FORMAT_RGBA32_FLOAT;
default:
assert(0);
return 0;
@@ -353,7 +353,7 @@ nvc0_clear_render_target(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
static void
@@ -609,7 +609,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
data, data_size);
}
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
static void
@@ -678,7 +678,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
void
@@ -693,7 +693,7 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers,
uint32_t mode = 0;
/* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
- if (!nvc0_state_validate(nvc0, NVC0_NEW_FRAMEBUFFER))
+ if (!nvc0_state_validate(nvc0, NVC0_NEW_3D_FRAMEBUFFER))
return;
if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
@@ -793,7 +793,7 @@ struct nvc0_blitctx
struct pipe_sampler_view *texture[2];
struct nv50_tsc_entry *sampler[2];
unsigned min_samples;
- uint32_t dirty;
+ uint32_t dirty_3d;
} saved;
struct nvc0_rasterizer_stateobj rast;
};
@@ -871,12 +871,14 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
blit->sampler[0].id = -1;
- blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) |
- (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT);
+ blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) |
+ (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT);
blit->sampler[0].tsc[1] =
- NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE;
+ G80_TSC_1_MAG_FILTER_NEAREST |
+ G80_TSC_1_MIN_FILTER_NEAREST |
+ G80_TSC_1_MIP_FILTER_NONE;
/* clamp to edge, min/max lod = 0, bilinear filtering */
@@ -884,7 +886,9 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
blit->sampler[1].tsc[1] =
- NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE;
+ G80_TSC_1_MAG_FILTER_LINEAR |
+ G80_TSC_1_MIN_FILTER_LINEAR |
+ G80_TSC_1_MIP_FILTER_NONE;
}
static void
@@ -1081,19 +1085,19 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
nvc0->min_samples = 1;
- ctx->saved.dirty = nvc0->dirty;
+ ctx->saved.dirty_3d = nvc0->dirty_3d;
nvc0->textures_dirty[4] |= 3;
nvc0->samplers_dirty[4] |= 3;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1));
- nvc0->dirty = NVC0_NEW_FRAMEBUFFER | NVC0_NEW_MIN_SAMPLES |
- NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
- NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
- NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS;
+ nvc0->dirty_3d = NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_MIN_SAMPLES |
+ NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG |
+ NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG |
+ NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS;
}
static void
@@ -1141,20 +1145,20 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
nvc0->cond_cond, nvc0->cond_mode);
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1));
nouveau_scratch_done(&nvc0->base);
- nvc0->dirty = blit->saved.dirty |
- (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK |
- NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND |
- NVC0_NEW_VIEWPORT |
- NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS |
- NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
- NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
- NVC0_NEW_TFB_TARGETS | NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
+ nvc0->dirty_3d = blit->saved.dirty_3d |
+ (NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_SAMPLE_MASK |
+ NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_ZSA | NVC0_NEW_3D_BLEND |
+ NVC0_NEW_3D_VIEWPORT |
+ NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS |
+ NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG |
+ NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG |
+ NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
nvc0->scissors_dirty |= 1;
nvc0->viewports_dirty |= 1;
@@ -1263,7 +1267,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
return;
}
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP,
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo);
nouveau_pushbuf_validate(push);
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 4);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 7223f5aecfb..53332400a4f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -22,35 +22,29 @@
#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_resource.h"
-#include "nv50/nv50_texture.xml.h"
-#include "nv50/nv50_defs.xml.h"
+#include "nvc0/gm107_texture.xml.h"
+#include "nvc0/nvc0_compute.xml.h"
+#include "nv50/g80_texture.xml.h"
+#include "nv50/g80_defs.xml.h"
#include "util/u_format.h"
#define NVE4_TIC_ENTRY_INVALID 0x000fffff
#define NVE4_TSC_ENTRY_INVALID 0xfff00000
-#define NV50_TIC_0_SWIZZLE__MASK \
- (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
- NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
-
static inline uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
+nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
{
switch (swz) {
- case PIPE_SWIZZLE_RED:
- return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
- case PIPE_SWIZZLE_GREEN:
- return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
- case PIPE_SWIZZLE_BLUE:
- return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
- case PIPE_SWIZZLE_ALPHA:
- return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_RED : return fmt->tic.src_x;
+ case PIPE_SWIZZLE_GREEN: return fmt->tic.src_y;
+ case PIPE_SWIZZLE_BLUE : return fmt->tic.src_z;
+ case PIPE_SWIZZLE_ALPHA: return fmt->tic.src_w;
case PIPE_SWIZZLE_ONE:
- return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
case PIPE_SWIZZLE_ZERO:
default:
- return NV50_TIC_MAP_ZERO;
+ return G80_TIC_SOURCE_ZERO;
}
}
@@ -67,14 +61,15 @@ nvc0_create_sampler_view(struct pipe_context *pipe,
return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
}
-struct pipe_sampler_view *
-nvc0_create_texture_view(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *templ,
- uint32_t flags,
- enum pipe_texture_target target)
+static struct pipe_sampler_view *
+gm107_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
{
const struct util_format_description *desc;
+ const struct nvc0_format *fmt;
uint64_t address;
uint32_t *tic;
uint32_t swz[4];
@@ -101,45 +96,224 @@ nvc0_create_texture_view(struct pipe_context *pipe,
tic = &view->tic[0];
desc = util_format_description(view->pipe.format);
+ tex_int = util_format_is_pure_integer(view->pipe.format);
+
+ fmt = &nvc0_format_table[view->pipe.format];
+ swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
+
+ tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
+ tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
+ tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
+ tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
+ tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
+ tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
+ tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
+ tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
+ tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
+
+ address = mt->base.address;
+
+ tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
+ tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
+ tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
+
+ if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
+ tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
+ else
+ tic[5] = 0;
+
+ /* check for linear storage type */
+ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
+ if (texture->target == PIPE_BUFFER) {
+ assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
+ width = view->pipe.u.buf.last_element - view->pipe.u.buf.first_element;
+ address +=
+ view->pipe.u.buf.first_element * desc->block.bits / 8;
+ tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
+ tic[3] |= width >> 16;
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
+ tic[4] |= width & 0xffff;
+ } else {
+ assert(!(mt->level[0].pitch & 0x1f));
+ /* must be 2D texture without mip maps */
+ tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH;
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
+ tic[3] |= mt->level[0].pitch >> 5;
+ tic[4] |= mt->base.base.width0 - 1;
+ tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
+ tic[5] |= mt->base.base.height0 - 1;
+ }
+ tic[1] = address;
+ tic[2] |= address >> 32;
+ tic[6] = 0;
+ tic[7] = 0;
+ return &view->pipe;
+ }
+
+ tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
+ tic[3] |=
+ ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
+ ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
+
+ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
- tic[0] = nvc0_format_table[view->pipe.format].tic;
+ if (mt->base.base.array_size > 1) {
+ /* there doesn't seem to be a base layer field in TIC */
+ address += view->pipe.u.tex.first_layer * mt->layer_stride;
+ depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
+ }
+ tic[1] = address;
+ tic[2] |= address >> 32;
+
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
+ break;
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ depth /= 6;
+ tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
+ break;
+ default:
+ unreachable("unexpected/invalid texture target");
+ }
+
+ tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
+ GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
+ GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
+ GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
+
+ if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
+ width = mt->base.base.width0 << mt->ms_x;
+ height = mt->base.base.height0 << mt->ms_y;
+ } else {
+ width = mt->base.base.width0;
+ height = mt->base.base.height0;
+ }
+
+ tic[4] |= width - 1;
+
+ tic[5] |= (height - 1) & 0xffff;
+ tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
+ tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
+
+ /* sampling points: (?) */
+ if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
+ tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
+ tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
+ } else {
+ tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
+ tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
+ }
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+ tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
+
+ return &view->pipe;
+}
+
+static struct pipe_sampler_view *
+gf100_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
+{
+ const struct util_format_description *desc;
+ const struct nvc0_format *fmt;
+ uint64_t address;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t width, height;
+ uint32_t depth;
+ struct nv50_tic_entry *view;
+ struct nv50_miptree *mt;
+ bool tex_int;
+
+ view = MALLOC_STRUCT(nv50_tic_entry);
+ if (!view)
+ return NULL;
+ mt = nv50_miptree(texture);
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(view->pipe.format);
+
+ fmt = &nvc0_format_table[view->pipe.format];
tex_int = util_format_is_pure_integer(view->pipe.format);
- swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
- swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
- swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
- swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
- tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
- (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
- (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
- (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
- (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+ swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
+ tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
+ (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
+ (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
+ (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
+ (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
+ (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
+ (swz[3] << G80_TIC_0_W_SOURCE__SHIFT);
address = mt->base.address;
- tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER;
+ tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+ tic[2] |= G80_TIC_2_SRGB_CONVERSION;
if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
- tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+ tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
/* check for linear storage type */
if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
if (texture->target == PIPE_BUFFER) {
- assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS));
+ assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
address +=
view->pipe.u.buf.first_element * desc->block.bits / 8;
- tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER;
+ tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
tic[3] = 0;
tic[4] = /* width */
view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
tic[5] = 0;
} else {
/* must be 2D texture without mip maps */
- tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
+ tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
tic[3] = mt->level[0].pitch;
tic[4] = mt->base.base.width0;
tic[5] = (1 << 16) | mt->base.base.height0;
@@ -167,30 +341,30 @@ nvc0_create_texture_view(struct pipe_context *pipe,
switch (target) {
case PIPE_TEXTURE_1D:
- tic[2] |= NV50_TIC_2_TARGET_1D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
break;
case PIPE_TEXTURE_2D:
- tic[2] |= NV50_TIC_2_TARGET_2D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_RECT:
- tic[2] |= NV50_TIC_2_TARGET_2D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_3D:
- tic[2] |= NV50_TIC_2_TARGET_3D;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
break;
case PIPE_TEXTURE_CUBE:
depth /= 6;
- tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
break;
case PIPE_TEXTURE_1D_ARRAY:
- tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
break;
case PIPE_TEXTURE_2D_ARRAY:
- tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
break;
case PIPE_TEXTURE_CUBE_ARRAY:
depth /= 6;
- tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
break;
default:
unreachable("unexpected/invalid texture target");
@@ -224,6 +398,18 @@ nvc0_create_texture_view(struct pipe_context *pipe,
return &view->pipe;
}
+struct pipe_sampler_view *
+nvc0_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
+{
+ if (nvc0_context(pipe)->screen->tic.maxwell)
+ return gm107_create_texture_view(pipe, texture, templ, flags, target);
+ return gf100_create_texture_view(pipe, texture, templ, flags, target);
+}
+
static void
nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
struct nv04_resource *res)
@@ -244,7 +430,7 @@ nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
tic->tic[2] |= address >> 32;
}
-static bool
+bool
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
{
uint32_t commands[32];
@@ -285,7 +471,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
- BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ if (unlikely(s == 5))
+ BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
+ else
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
PUSH_DATA (push, (tic->id << 4) | 1);
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
}
@@ -298,7 +487,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
continue;
commands[n++] = (tic->id << 9) | (i << 1) | 1;
- BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ if (unlikely(s == 5))
+ BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
+ else
+ BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
}
for (; i < nvc0->state.num_textures[s]; ++i)
commands[n++] = (i << 1) | 0;
@@ -306,7 +498,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
nvc0->state.num_textures[s] = nvc0->num_textures[s];
if (n) {
- BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
+ if (unlikely(s == 5))
+ BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
+ else
+ BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
PUSH_DATAp(push, commands, n);
}
nvc0->textures_dirty[s] = 0;
@@ -362,7 +557,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
nvc0->tex_handles[s][i] |= tic->id;
if (dirty)
- BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
}
for (; i < nvc0->state.num_textures[s]; ++i) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
@@ -392,7 +587,7 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
}
}
-static bool
+bool
nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
{
uint32_t commands[16];
@@ -410,6 +605,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
commands[n++] = (i << 4) | 0;
continue;
}
+ nvc0->seamless_cube_map = tsc->seamless_cube_map;
if (tsc->id < 0) {
tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
@@ -428,7 +624,10 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
if (n) {
- BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
+ if (unlikely(s == 5))
+ BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
+ else
+ BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
PUSH_DATAp(push, commands, n);
}
nvc0->samplers_dirty[s] = 0;
@@ -513,7 +712,7 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
return;
- address = nvc0->screen->uniform_bo->offset + (5 << 16);
+ address = nvc0->screen->uniform_bo->offset + (6 << 16);
for (s = 0; s < 5; ++s, address += (1 << 10)) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
@@ -686,45 +885,45 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0)
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
{
- [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT,
- [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT,
- [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT,
- [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT,
- [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM,
- [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM,
- [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT,
- [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT,
- [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM,
- [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM,
- [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT,
- [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT,
- [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT,
- [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM,
-/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */
- [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT,
- [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT,
- [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT,
- [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT,
- [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM,
- [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM,
- [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT,
- [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT,
- [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM,
- [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM,
- [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT,
- [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT,
- [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT,
- [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT,
- [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT,
- [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT,
- [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM,
- [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM,
- [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT,
- [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT,
- [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM,
- [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM,
- [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT,
- [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT,
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT, */
+ [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
+ [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
+ [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
+ [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
+ [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
+ [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
+ [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
+ [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
+ [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
+ [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
+ [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
+ [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
+ [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
};
/* Auxiliary format description values for surface instructions.
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index 279c7e93cc8..24d23d29bbf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -3,8 +3,6 @@
#include "nvc0/nvc0_context.h"
-#include "nv50/nv50_defs.xml.h"
-
struct nvc0_transfer {
struct pipe_transfer base;
struct nv50_m2mf_rect rect[2];
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 032b3c125cf..647aa10ec35 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -80,7 +80,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
enum pipe_format fmt = ve->src_format;
so->element[i].pipe = elements[i];
- so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->element[i].state = nvc0_vertex_format[fmt].vtx;
if (!so->element[i].state) {
switch (util_format_get_nr_components(fmt)) {
@@ -93,7 +93,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
FREE(so);
return NULL;
}
- so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->element[i].state = nvc0_vertex_format[fmt].vtx;
so->need_conversion = true;
pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
"Converting vertex element %d, no hw format %s",
@@ -222,7 +222,7 @@ static inline void
nvc0_release_user_vbufs(struct nvc0_context *nvc0)
{
if (nvc0->vbo_user) {
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
nouveau_scratch_done(&nvc0->base);
}
}
@@ -257,7 +257,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0)
address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer,
base, size, &bo);
if (bo)
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo);
NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size);
}
@@ -292,7 +292,7 @@ nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0)
address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer,
base, size, &bo);
if (bo)
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo);
BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5);
PUSH_DATA (push, b);
@@ -368,7 +368,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
if (!(refd & (1 << b))) {
refd |= 1 << b;
- BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, res, RD);
}
}
if (nvc0->vbo_user)
@@ -412,7 +412,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
PUSH_DATAh(push, buf->address + limit);
PUSH_DATA (push, buf->address + limit);
- BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, buf, RD);
}
/* If there are more elements than buffers, we might not have unset
* fetching on the later elements.
@@ -435,7 +435,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
uint8_t vbo_mode;
bool update_vertex;
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX);
assert(vertex);
if (unlikely(vertex->need_conversion) ||
@@ -446,7 +446,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
}
const_vbos = vbo_mode ? 0 : nvc0->constant_vbos;
- update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) ||
+ update_vertex = (nvc0->dirty_3d & NVC0_NEW_3D_VERTEX) ||
(const_vbos != nvc0->state.constant_vbos) ||
(vbo_mode != nvc0->state.vbo_mode);
@@ -537,7 +537,7 @@ nvc0_idxbuf_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, buf->address + buf->base.width0 - 1);
PUSH_DATA (push, nvc0->idxbuf.index_size >> 1);
- BCTX_REFN(nvc0->bufctx_3d, IDX, buf, RD);
+ BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
}
#define NVC0_PRIM_GL_CASE(n) \
@@ -833,8 +833,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
/* Queue things up to let the macros write params to the driver constbuf */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
+ PUSH_DATA (push, 256 + 128);
if (info->indexed) {
assert(nvc0->idxbuf.buffer);
@@ -947,12 +949,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->indexed && (nvc0->vb_elt_limit >= (info->count * 2));
/* Check whether we want to switch vertex-submission mode. */
- if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) {
+ if (nvc0->vbo_user && !(nvc0->dirty_3d & (NVC0_NEW_3D_ARRAYS | NVC0_NEW_3D_VERTEX))) {
if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode)
if (nvc0->state.vbo_mode != 3)
- nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS;
- if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) {
+ if (!(nvc0->dirty_3d & NVC0_NEW_3D_ARRAYS) && nvc0->state.vbo_mode == 0) {
if (nvc0->vertex->shared_slots)
nvc0_update_user_vbufs_shared(nvc0);
else
@@ -973,8 +975,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
if (!info->indirect) {
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
PUSH_DATA (push, 256 + 128);
@@ -984,6 +986,14 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
}
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS &&
+ nvc0->seamless_cube_map != nvc0->state.seamless_cube_map) {
+ nvc0->state.seamless_cube_map = nvc0->seamless_cube_map;
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(TEX_MISC),
+ nvc0->seamless_cube_map ? NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
+ }
+
push->kick_notify = nvc0_draw_vbo_kick_notify;
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index 9c19ba20a7e..20b6742d8d7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -225,7 +225,7 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
PUSH_DATAh(push, va + size - 1);
PUSH_DATA (push, va + size - 1);
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
bo);
nouveau_pushbuf_validate(push);
@@ -554,7 +554,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
++ctx.instance_id;
}
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
nouveau_scratch_done(&nvc0->base);
} while (inst_count);
@@ -629,7 +629,7 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx,
data = (uint32_t *)nouveau_scratch_get(&nvc0->base,
info->count * index_size, &va, &bo);
- BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
bo);
nouveau_pushbuf_validate(push);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
index 79abe78b77a..4d07546c310 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
@@ -50,9 +50,9 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n)
#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n)
-#define SUBC_COMPUTE(m) 1, (m)
-#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n)
-#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n)
+#define SUBC_CP(m) 1, (m)
+#define NVC0_CP(n) SUBC_CP(NVC0_COMPUTE_##n)
+#define NVE4_CP(n) SUBC_CP(NVE4_COMPUTE_##n)
#define SUBC_M2MF(m) 2, (m)
#define SUBC_P2MF(m) 2, (m)
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index d3e5676873e..652bc6d83d6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -39,7 +39,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
{
struct nouveau_device *dev = screen->base.device;
struct nouveau_object *chan = screen->base.channel;
- unsigned i;
+ int i;
int ret;
uint32_t obj_class;
@@ -51,6 +51,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
case 0xe0:
obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
break;
+ case 0x110:
+ obj_class = GM107_COMPUTE_CLASS;
+ break;
default:
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
return -1;
@@ -68,21 +71,21 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
if (ret)
return ret;
- BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->oclass);
- BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(TEMP_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->tls->offset);
PUSH_DATA (push, screen->tls->offset);
/* No idea why there are 2. Divide size by 2 to be safe.
* Actually this might be per-MP TEMP size and looks like I'm only using
* 2 MPs instead of all 8.
*/
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3);
+ BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(0)), 3);
PUSH_DATAh(push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
PUSH_DATA (push, 0xff);
- BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3);
+ BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
PUSH_DATAh(push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
PUSH_DATA (push, 0xff);
@@ -92,52 +95,53 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
* FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
* accessible. We cannot prevent that at the moment, so expect failure.
*/
- BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1);
+ BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
PUSH_DATA (push, 1 << 24);
- BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1);
+ BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
PUSH_DATA (push, 2 << 24);
- BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1);
+ BEGIN_NVC0(push, SUBC_CP(0x0310), 1);
PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
/* NOTE: these do not affect the state used by the 3D object */
- BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ BEGIN_NVC0(push, NVE4_CP(TIC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset);
PUSH_DATA (push, screen->txc->offset);
PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
- BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ BEGIN_NVC0(push, NVE4_CP(TSC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset + 65536);
PUSH_DATA (push, screen->txc->offset + 65536);
PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
if (obj_class >= NVF0_COMPUTE_CLASS) {
- BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1);
- PUSH_DATA (push, 0x100);
- BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63);
- for (i = 63; i >= 1; --i)
+ /* The blob calls GK110_COMPUTE.FIRMWARE[0x6], along with the args (0x1)
+ * passed with GK110_COMPUTE.GRAPH.SCRATCH[0x2]. This is currently
+ * disabled because our firmware doesn't support these commands and the
+ * GPU hangs if they are used. */
+ BEGIN_NIC0(push, SUBC_CP(0x0248), 64);
+ for (i = 63; i >= 0; i--)
PUSH_DATA(push, 0x38000 | i);
- IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
- IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0);
+ IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0);
}
- BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1);
+ BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
PUSH_DATA (push, 0); /* does not interefere with 3D */
- if (obj_class >= NVF0_COMPUTE_CLASS)
- IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ if (obj_class == NVF0_COMPUTE_CLASS)
+ IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
/* MS sample coordinate offsets: these do not work with _ALT modes ! */
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATA (push, 0); /* 0 */
PUSH_DATA (push, 0);
@@ -157,13 +161,13 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATA (push, 1);
#ifdef DEBUG
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 28);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 8);
PUSH_DATA (push, 1);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
@@ -174,7 +178,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATA (push, 0); /* warp cfstack size */
#endif
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
return 0;
@@ -201,13 +205,13 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
* NVE4's surface load/store instructions receive all the information
* directly instead of via binding points, so we have to supply them.
*/
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
@@ -223,7 +227,7 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
}
}
if (nvc0->surfaces_dirty[t]) {
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
@@ -252,7 +256,7 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0)
{
bool need_flush = nve4_validate_tsc(nvc0, 5);
if (need_flush) {
- BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
+ BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
}
@@ -281,17 +285,17 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, n * 4);
PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + n);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
nvc0->textures_dirty[s] = 0;
@@ -338,29 +342,29 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
struct nvc0_program *cp = nvc0->compprog;
if (cp->parm_size) {
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset);
PUSH_DATA (push, screen->parm->offset);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, cp->parm_size);
PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, input, cp->parm_size / 4);
}
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 7 * 4);
PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, block_layout, 3);
PUSH_DATAp(push, grid_layout, 3);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
@@ -429,10 +433,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
}
void
-nve4_launch_grid(struct pipe_context *pipe,
- const uint *block_layout, const uint *grid_layout,
- uint32_t label,
- const void *input)
+nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -453,33 +454,34 @@ nve4_launch_grid(struct pipe_context *pipe,
if (ret)
goto out;
- nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
+ nve4_compute_setup_launch_desc(nvc0, desc, info->pc,
+ info->block, info->grid);
#ifdef DEBUG
if (debug_get_num_option("NV50_PROG_DEBUG", 0))
nve4_compute_dump_launch_desc(desc);
#endif
- nve4_compute_upload_input(nvc0, input, block_layout, grid_layout);
+ nve4_compute_upload_input(nvc0, info->input, info->block, info->grid);
/* upload descriptor and flush */
#if 0
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, desc_gpuaddr);
PUSH_DATA (push, desc_gpuaddr);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 256);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4));
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
- BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
#endif
- BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1);
+ BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);
PUSH_DATA (push, desc_gpuaddr >> 8);
- BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1);
+ BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
PUSH_DATA (push, 0x3);
- BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
out:
@@ -517,13 +519,13 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
PUSH_SPACE(push, 16);
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, txc->offset + (tic->id * 32));
PUSH_DATA (push, txc->offset + (tic->id * 32));
- BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, 32);
PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, &tic->tic[0], 8);
@@ -546,11 +548,11 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
if (n[0]) {
- BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]);
+ BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]);
PUSH_DATAp(push, commands[0], n[0]);
}
if (n[1]) {
- BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]);
+ BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]);
PUSH_DATAp(push, commands[1], n[1]);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
index 7364a68a579..84f8593b9b6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -2,7 +2,6 @@
#ifndef NVE4_COMPUTE_H
#define NVE4_COMPUTE_H
-#include "nv50/nv50_defs.xml.h"
#include "nvc0/nve4_compute.xml.h"
/* Input space is implemented as c0[], to which we bind the screen->parm bo.
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a2b7f87855d..15a94d90721 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -319,11 +319,14 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
}
break;
case PIPE_SHADER_VERTEX:
@@ -378,11 +381,14 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
}
break;
}
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 56c7fb93f73..997e5f0e383 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -553,25 +553,24 @@ void evergreen_emit_cs_shader(
}
static void evergreen_launch_grid(
- struct pipe_context *ctx_,
- const uint *block_layout, const uint *grid_layout,
- uint32_t pc, const void *input)
+ struct pipe_context *ctx_, const struct pipe_grid_info *info)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
#ifdef HAVE_OPENCL
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
boolean use_kill;
- ctx->cs_shader_state.pc = pc;
+ ctx->cs_shader_state.pc = info->pc;
/* Get the config information for this kernel. */
- r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill);
+ r600_shader_binary_read_config(&shader->binary, &shader->bc,
+ info->pc, &use_kill);
#endif
- COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
+ COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc);
- evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
- compute_emit_cs(ctx, block_layout, grid_layout);
+ evergreen_compute_upload_input(ctx_, info->block, info->grid, info->input);
+ compute_emit_cs(ctx, info->block, info->grid);
}
static void evergreen_set_compute_resources(struct pipe_context * ctx_,
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 0fe7c74418d..7eab29c6eb4 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -784,12 +784,12 @@ LLVMModuleRef r600_tgsi_llvm(
{
struct tgsi_shader_info shader_info;
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
- radeon_llvm_context_init(ctx);
+ radeon_llvm_context_init(ctx, "r600--");
LLVMTypeRef Arguments[32];
unsigned ArgumentsCount = 0;
for (unsigned i = 0; i < ctx->inputs_count; i++)
Arguments[ArgumentsCount++] = LLVMVectorType(bld_base->base.elem_type, 4);
- radeon_llvm_create_func(ctx, Arguments, ArgumentsCount);
+ radeon_llvm_create_func(ctx, NULL, 0, Arguments, ArgumentsCount);
for (unsigned i = 0; i < ctx->inputs_count; i++) {
LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
LLVMAddAttribute(P, LLVMInRegAttribute);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index c8580d807d7..7fb4108a188 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -532,6 +532,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
} else {
return PIPE_SHADER_IR_TGSI;
}
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
if (rscreen->b.family == CHIP_CYPRESS ||
rscreen->b.family == CHIP_CAYMAN || rscreen->b.family == CHIP_ARUBA)
@@ -541,6 +543,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
/* due to a bug in the shader compiler, some loops hang
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 324d2719f44..ea028272ccd 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -411,6 +411,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
{ "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
+ { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
DEBUG_NAMED_VALUE_END /* must be last */
};
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index e92df876c22..7df617737a7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -89,6 +89,7 @@
#define DBG_NO_DCC_CLEAR (1llu << 44)
#define DBG_NO_RB_PLUS (1llu << 45)
#define DBG_SI_SCHED (1llu << 46)
+#define DBG_MONOLITHIC_SHADERS (1llu << 47)
#define R600_MAP_BUFFER_ALIGNMENT 64
@@ -96,7 +97,7 @@ struct r600_common_context;
struct r600_perfcounters;
struct radeon_shader_reloc {
- char *name;
+ char name[32];
uint64_t offset;
};
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index af206e43860..1df0c300e85 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1293,6 +1293,7 @@ unsigned r600_translate_colorswap(enum pipe_format format)
break;
case 4:
/* check the middle channels, the 1st and 4th channel can be NONE */
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z))
return V_0280A0_SWAP_STD; /* XYZW */
else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y))
@@ -1301,6 +1302,16 @@ unsigned r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_ALT; /* ZYXW */
else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y))
return V_0280A0_SWAP_ALT_REV; /* WXYZ */
+#else
+ if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,X))
+ return V_0280A0_SWAP_STD; /* ZWXY */
+ else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,W))
+ return V_0280A0_SWAP_STD_REV; /* YXWZ */
+ else if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,Z))
+ return V_0280A0_SWAP_ALT; /* XWZY */
+ else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W))
+ return V_0280A0_SWAP_ALT_REV; /* YZWX */
+#endif
break;
}
return ~0U;
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c
index 2e45d439e7a..8aaa85d02f6 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -98,7 +98,8 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
reloc->offset = rel.r_offset;
- reloc->name = strdup(symbol_name);
+ strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1);
+ reloc->name[sizeof(reloc->name)-1] = 0;
}
}
@@ -194,26 +195,3 @@ const unsigned char *radeon_shader_binary_config_start(
}
return binary->config;
}
-
-void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
- unsigned reloc_count)
-{
- unsigned i;
- for (i = 0; i < reloc_count; i++) {
- FREE(relocs[i].name);
- }
- FREE(relocs);
-}
-
-void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
- unsigned free_relocs)
-{
- FREE(binary->code);
- FREE(binary->config);
- FREE(binary->rodata);
-
- if (free_relocs) {
- radeon_shader_binary_free_relocs(binary->relocs,
- binary->reloc_count);
- }
-}
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.h b/src/gallium/drivers/radeon/radeon_elf_util.h
index ea4ab2f14b2..c2af9e0dfe0 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.h
+++ b/src/gallium/drivers/radeon/radeon_elf_util.h
@@ -47,18 +47,4 @@ const unsigned char *radeon_shader_binary_config_start(
const struct radeon_shader_binary *binary,
uint64_t symbol_offset);
-/**
- * Free all memory allocated for members of \p binary. This function does
- * not free \p binary.
- *
- * @param free_relocs If false, reolc information will not be freed.
- */
-void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
- unsigned free_relocs);
-
-/**
- * Free \p relocs and all member data.
- */
-void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
- unsigned reloc_count);
#endif /* RADEON_ELF_UTIL_H */
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index e967ad2214e..bdee2f8020a 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -113,6 +113,7 @@ struct radeon_llvm_context {
struct tgsi_declaration_range *arrays;
LLVMValueRef main_fn;
+ LLVMTypeRef return_type;
struct gallivm_state gallivm;
};
@@ -158,10 +159,12 @@ void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_bas
LLVMValueRef *coords_arg,
LLVMValueRef *derivs_arg);
-void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
+void radeon_llvm_context_init(struct radeon_llvm_context * ctx,
+ const char *triple);
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
- LLVMTypeRef *ParamTypes, unsigned ParamCount);
+ LLVMTypeRef *return_types, unsigned num_return_elems,
+ LLVMTypeRef *ParamTypes, unsigned ParamCount);
void radeon_llvm_dispose(struct radeon_llvm_context * ctx);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
index 0dfd9ad4867..da19533b862 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_util.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
@@ -55,8 +55,10 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
static void radeon_llvm_optimize(LLVMModuleRef mod)
{
+#if HAVE_LLVM < 0x0309
const char *data_layout = LLVMGetDataLayout(mod);
LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
+#endif
LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
@@ -77,14 +79,18 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
}
}
+#if HAVE_LLVM < 0x0309
LLVMAddTargetData(TD, pass_manager);
+#endif
LLVMAddAlwaysInlinerPass(pass_manager);
LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
LLVMRunPassManager(pass_manager, mod);
LLVMPassManagerBuilderDispose(builder);
LLVMDisposePassManager(pass_manager);
+#if HAVE_LLVM < 0x0309
LLVMDisposeTargetData(TD);
+#endif
}
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index f5e3f6af1a0..c74397fb5c9 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -30,6 +30,7 @@
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_misc.h"
#include "gallivm/lp_bld_swizzle.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
@@ -1520,7 +1521,7 @@ static void emit_up2h(const struct lp_build_tgsi_action *action,
}
}
-void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
+void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple)
{
struct lp_type type;
@@ -1534,6 +1535,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
ctx->gallivm.context = LLVMContextCreate();
ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
ctx->gallivm.context);
+ LLVMSetTarget(ctx->gallivm.module,
+
+#if HAVE_LLVM < 0x0306
+ "r600--");
+#else
+ triple);
+#endif
ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
@@ -1693,14 +1701,22 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
}
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
+ LLVMTypeRef *return_types, unsigned num_return_elems,
LLVMTypeRef *ParamTypes, unsigned ParamCount)
{
- LLVMTypeRef main_fn_type;
+ LLVMTypeRef main_fn_type, ret_type;
LLVMBasicBlockRef main_fn_body;
+ if (num_return_elems)
+ ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
+ return_types,
+ num_return_elems, true);
+ else
+ ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
+
/* Setup the function */
- main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context),
- ParamTypes, ParamCount, 0);
+ ctx->return_type = ret_type;
+ main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
ctx->main_fn, "main_body");
@@ -1710,11 +1726,16 @@ void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
{
struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
+ const char *triple = LLVMGetTarget(gallivm->module);
+ LLVMTargetLibraryInfoRef target_library_info;
/* Create the pass manager */
gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
gallivm->module);
+ target_library_info = gallivm_create_target_library_info(triple);
+ LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
+
/* This pass should eliminate all the load and store instructions */
LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
@@ -1730,7 +1751,7 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
LLVMDisposeBuilder(gallivm->builder);
LLVMDisposePassManager(gallivm->passmgr);
-
+ gallivm_dispose_target_library_info(target_library_info);
}
void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 7370a113d3d..9f5f4c682bc 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -196,9 +196,7 @@ static unsigned compute_num_waves_for_scratch(
}
static void si_launch_grid(
- struct pipe_context *ctx,
- const uint *block_layout, const uint *grid_layout,
- uint32_t pc, const void *input)
+ struct pipe_context *ctx, const struct pipe_grid_info *info)
{
struct si_context *sctx = (struct si_context*)ctx;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
@@ -232,7 +230,7 @@ static void si_launch_grid(
pm4->compute_pkt = true;
/* Read the config information */
- si_shader_binary_read_config(&shader->binary, &shader->config, pc);
+ si_shader_binary_read_config(&shader->binary, &shader->config, info->pc);
/* Upload the kernel arguments */
@@ -242,15 +240,16 @@ static void si_launch_grid(
kernel_args = sctx->b.ws->buffer_map(input_buffer->buf,
sctx->b.gfx.cs, PIPE_TRANSFER_WRITE);
for (i = 0; i < 3; i++) {
- kernel_args[i] = grid_layout[i];
- kernel_args[i + 3] = grid_layout[i] * block_layout[i];
- kernel_args[i + 6] = block_layout[i];
+ kernel_args[i] = info->grid[i];
+ kernel_args[i + 3] = info->grid[i] * info->block[i];
+ kernel_args[i + 6] = info->block[i];
}
num_waves_for_scratch = compute_num_waves_for_scratch(
- &sctx->screen->b.info, block_layout, grid_layout);
+ &sctx->screen->b.info, info->block, info->grid);
- memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size);
+ memcpy(kernel_args + (num_work_size_bytes / 4), info->input,
+ program->input_size);
if (shader->config.scratch_bytes_per_wave > 0) {
@@ -291,11 +290,11 @@ static void si_launch_grid(
si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0);
si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
- S_00B81C_NUM_THREAD_FULL(block_layout[0]));
+ S_00B81C_NUM_THREAD_FULL(info->block[0]));
si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
- S_00B820_NUM_THREAD_FULL(block_layout[1]));
+ S_00B820_NUM_THREAD_FULL(info->block[1]));
si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
- S_00B824_NUM_THREAD_FULL(block_layout[2]));
+ S_00B824_NUM_THREAD_FULL(info->block[2]));
/* Global buffers */
for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
@@ -323,7 +322,7 @@ static void si_launch_grid(
}
shader_va = shader->bo->gpu_address;
- shader_va += pc;
+ shader_va += info->pc;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
@@ -375,9 +374,9 @@ static void si_launch_grid(
;
si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
- si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */
- si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */
- si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */
+ si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */
+ si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */
+ si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */
si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
si_pm4_cmd_end(pm4, false);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index e9d69d2db38..37fd4a25d59 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -22,6 +22,7 @@
*/
#include "si_pipe.h"
+#include "si_shader.h"
#include "si_public.h"
#include "sid.h"
@@ -448,6 +449,10 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_NATIVE;
+
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
+
case PIPE_SHADER_CAP_DOUBLES:
return HAVE_LLVM >= 0x0307;
@@ -511,6 +516,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
return HAVE_LLVM >= 0x0307;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
@@ -522,6 +529,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
}
return 0;
@@ -530,6 +538,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
static void si_destroy_screen(struct pipe_screen* pscreen)
{
struct si_screen *sscreen = (struct si_screen *)pscreen;
+ struct si_shader_part *parts[] = {
+ sscreen->vs_prologs,
+ sscreen->vs_epilogs,
+ sscreen->tcs_epilogs,
+ sscreen->ps_prologs,
+ sscreen->ps_epilogs
+ };
+ unsigned i;
if (!sscreen)
return;
@@ -537,6 +553,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
if (!sscreen->b.ws->unref(sscreen->b.ws))
return;
+ /* Free shader parts. */
+ for (i = 0; i < ARRAY_SIZE(parts); i++) {
+ while (parts[i]) {
+ struct si_shader_part *part = parts[i];
+
+ parts[i] = part->next;
+ radeon_shader_binary_clean(&part->binary);
+ FREE(part);
+ }
+ }
+ pipe_mutex_destroy(sscreen->shader_parts_mutex);
+ si_destroy_shader_cache(sscreen);
r600_destroy_common_screen(&sscreen->b);
}
@@ -584,7 +612,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;
if (!r600_common_screen_init(&sscreen->b, ws) ||
- !si_init_gs_info(sscreen)) {
+ !si_init_gs_info(sscreen) ||
+ !si_init_shader_cache(sscreen)) {
FREE(sscreen);
return NULL;
}
@@ -594,6 +623,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
+ pipe_mutex_init(sscreen->shader_parts_mutex);
+ sscreen->use_monolithic_shaders =
+ HAVE_LLVM < 0x0308 ||
+ (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b5790d6b564..ef860a58b83 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -80,10 +80,36 @@
#define SI_MAX_BORDER_COLORS 4096
struct si_compute;
+struct hash_table;
struct si_screen {
struct r600_common_screen b;
unsigned gs_table_depth;
+
+ /* Whether shaders are monolithic (1-part) or separate (3-part). */
+ bool use_monolithic_shaders;
+
+ pipe_mutex shader_parts_mutex;
+ struct si_shader_part *vs_prologs;
+ struct si_shader_part *vs_epilogs;
+ struct si_shader_part *tcs_epilogs;
+ struct si_shader_part *ps_prologs;
+ struct si_shader_part *ps_epilogs;
+
+ /* Shader cache in memory.
+ *
+ * Design & limitations:
+ * - The shader cache is per screen (= per process), never saved to
+ * disk, and skips redundant shader compilations from TGSI to bytecode.
+ * - It can only be used with one-variant-per-shader support, in which
+ * case only the main (typically middle) part of shaders is cached.
+ * - Only VS, TCS, TES, PS are cached, out of which only the hw VS
+ * variants of VS and TES are cached, so LS and ES aren't.
+ * - GS and CS aren't cached, but it's certainly possible to cache
+ * those as well.
+ */
+ pipe_mutex shader_cache_mutex;
+ struct hash_table *shader_cache;
};
struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index baa1090e2fb..57458ae1381 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,6 +70,12 @@ struct si_shader_context
unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
bool is_gs_copy_shader;
+
+ /* Whether to generate the optimized shader variant compiled as a whole
+ * (without a prolog and epilog)
+ */
+ bool is_monolithic;
+
int param_streamout_config;
int param_streamout_write_index;
int param_streamout_offset[4];
@@ -77,6 +83,7 @@ struct si_shader_context
int param_rel_auto_id;
int param_vs_prim_id;
int param_instance_id;
+ int param_vertex_index0;
int param_tes_u;
int param_tes_v;
int param_tes_rel_patch_id;
@@ -96,14 +103,17 @@ struct si_shader_context
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
LLVMValueRef gs_next_vertex[4];
+ LLVMValueRef return_value;
LLVMTypeRef voidt;
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i32;
+ LLVMTypeRef i64;
LLVMTypeRef i128;
LLVMTypeRef f32;
LLVMTypeRef v16i8;
+ LLVMTypeRef v2i32;
LLVMTypeRef v4i32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
@@ -118,9 +128,17 @@ static struct si_shader_context *si_shader_context(
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
struct si_shader *shader,
- LLVMTargetMachineRef tm,
- struct tgsi_shader_info *info);
+ LLVMTargetMachineRef tm);
+/* Ideally pass the sample mask input to the PS epilog as v13, which
+ * is its usual location, so that the shader doesn't have to add v_mov.
+ */
+#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
+
+/* The VS location of the PrimitiveID input is the same in the epilog,
+ * so that the main shader part doesn't have to move it.
+ */
+#define VS_EPILOG_PRIMID_LOC 2
#define PERSPECTIVE_BASE 0
#define LINEAR_BASE 9
@@ -196,6 +214,10 @@ static LLVMValueRef unpack_param(struct si_shader_context *ctx,
LLVMValueRef value = LLVMGetParam(ctx->radeon_bld.main_fn,
param);
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
+ value = bitcast(&ctx->radeon_bld.soa.bld_base,
+ TGSI_TYPE_UNSIGNED, value);
+
if (rshift)
value = LLVMBuildLShr(gallivm->builder, value,
lp_build_const_int32(gallivm, rshift), "");
@@ -375,7 +397,7 @@ static LLVMValueRef build_indexed_load_const(
static LLVMValueRef get_instance_index_for_fetch(
struct radeon_llvm_context *radeon_bld,
- unsigned divisor)
+ unsigned param_start_instance, unsigned divisor)
{
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
@@ -389,8 +411,8 @@ static LLVMValueRef get_instance_index_for_fetch(
result = LLVMBuildUDiv(gallivm->builder, result,
lp_build_const_int32(gallivm, divisor), "");
- return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
- radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+ return LLVMBuildAdd(gallivm->builder, result,
+ LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
}
static void declare_input_vs(
@@ -402,7 +424,8 @@ static void declare_input_vs(
struct gallivm_state *gallivm = base->gallivm;
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
- unsigned divisor = ctx->shader->key.vs.instance_divisors[input_index];
+ unsigned divisor =
+ ctx->shader->key.vs.prolog.instance_divisors[input_index];
unsigned chan;
@@ -424,10 +447,16 @@ static void declare_input_vs(
/* Build the attribute offset */
attribute_offset = lp_build_const_int32(gallivm, 0);
- if (divisor) {
+ if (!ctx->is_monolithic) {
+ buffer_index = LLVMGetParam(radeon_bld->main_fn,
+ ctx->param_vertex_index0 +
+ input_index);
+ } else if (divisor) {
/* Build index from instance ID, start instance and divisor */
- ctx->shader->uses_instanceid = true;
- buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, divisor);
+ ctx->shader->info.uses_instanceid = true;
+ buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld,
+ SI_PARAM_START_INSTANCE,
+ divisor);
} else {
/* Load the buffer index for vertices. */
LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn,
@@ -853,7 +882,8 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
static unsigned select_interp_param(struct si_shader_context *ctx,
unsigned param)
{
- if (!ctx->shader->key.ps.force_persample_interp)
+ if (!ctx->shader->key.ps.prolog.force_persample_interp ||
+ !ctx->is_monolithic)
return param;
/* If the shader doesn't use center/centroid, just return the parameter.
@@ -923,7 +953,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
if (semantic_name == TGSI_SEMANTIC_COLOR &&
- ctx->shader->key.ps.color_two_side) {
+ ctx->shader->key.ps.prolog.color_two_side) {
LLVMValueRef args[4];
LLVMValueRef is_face_positive;
LLVMValueRef back_attr_number;
@@ -997,6 +1027,7 @@ static void declare_input_fs(
unsigned input_index,
const struct tgsi_full_declaration *decl)
{
+ struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
struct si_shader *shader = ctx->shader;
@@ -1004,6 +1035,26 @@ static void declare_input_fs(
LLVMValueRef interp_param = NULL;
int interp_param_idx;
+ /* Get colors from input VGPRs (set by the prolog). */
+ if (!ctx->is_monolithic &&
+ decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+ unsigned i = decl->Semantic.Index;
+ unsigned colors_read = shader->selector->info.colors_read;
+ unsigned mask = colors_read >> (i * 4);
+ unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
+ (i ? util_bitcount(colors_read & 0xf) : 0);
+
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
+ mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
+ mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
+ mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
+ mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ return;
+ }
+
interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
decl->Interp.Location);
if (interp_param_idx == -1)
@@ -1330,12 +1381,12 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
const union si_shader_key *key = &ctx->shader->key;
- unsigned col_formats = key->ps.spi_shader_col_format;
+ unsigned col_formats = key->ps.epilog.spi_shader_col_format;
int cbuf = target - V_008DFC_SQ_EXP_MRT;
assert(cbuf >= 0 && cbuf < 8);
spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
- is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1;
+ is_int8 = (key->ps.epilog.color_is_int8 >> cbuf) & 0x1;
}
args[4] = uint->zero; /* COMPR flag */
@@ -1488,13 +1539,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- if (ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
+ if (ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
LLVMValueRef alpha_ref = LLVMGetParam(ctx->radeon_bld.main_fn,
SI_PARAM_ALPHA_REF);
LLVMValueRef alpha_pass =
lp_build_cmp(&bld_base->base,
- ctx->shader->key.ps.alpha_func,
+ ctx->shader->key.ps.epilog.alpha_func,
alpha, alpha_ref);
LLVMValueRef arg =
lp_build_select(&bld_base->base,
@@ -1511,7 +1562,8 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
}
static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef alpha)
+ LLVMValueRef alpha,
+ unsigned samplemask_param)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -1519,7 +1571,7 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
coverage = LLVMGetParam(ctx->radeon_bld.main_fn,
- SI_PARAM_SAMPLE_COVERAGE);
+ samplemask_param);
coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
@@ -1841,7 +1893,8 @@ handle_semantic:
case TGSI_SEMANTIC_COLOR:
case TGSI_SEMANTIC_BCOLOR:
target = V_008DFC_SQ_EXP_PARAM + param_count;
- shader->vs_output_param_offset[i] = param_count;
+ assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[i] = param_count;
param_count++;
break;
case TGSI_SEMANTIC_CLIPDIST:
@@ -1855,7 +1908,8 @@ handle_semantic:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_GENERIC:
target = V_008DFC_SQ_EXP_PARAM + param_count;
- shader->vs_output_param_offset[i] = param_count;
+ assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[i] = param_count;
param_count++;
break;
default:
@@ -1883,7 +1937,7 @@ handle_semantic:
}
}
- shader->nr_param_exports = param_count;
+ shader->info.nr_param_exports = param_count;
/* We need to add the position output manually if it's missing. */
if (!pos_args[0][0]) {
@@ -1945,7 +1999,7 @@ handle_semantic:
for (i = 0; i < 4; i++)
if (pos_args[i][0])
- shader->nr_pos_exports++;
+ shader->info.nr_pos_exports++;
pos_idx = 0;
for (i = 0; i < 4; i++) {
@@ -1955,7 +2009,7 @@ handle_semantic:
/* Specify the target we are exporting */
pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
- if (pos_idx == shader->nr_pos_exports)
+ if (pos_idx == shader->info.nr_pos_exports)
/* Specify that this is the last export */
pos_args[i][2] = uint->one;
@@ -1989,7 +2043,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
invocation_id, bld_base->uint_bld.zero, ""));
/* Determine the layout of one tess factor element in the buffer. */
- switch (shader->key.tcs.prim_mode) {
+ switch (shader->key.tcs.epilog.prim_mode) {
case PIPE_PRIM_LINES:
stride = 2; /* 2 dwords, 1 vec2 store */
outer_comps = 2;
@@ -2061,14 +2115,51 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef invocation_id;
+ LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
+ rel_patch_id = get_rel_patch_id(ctx);
invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+ tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
- si_write_tess_factors(bld_base,
- get_rel_patch_id(ctx),
- invocation_id,
- get_tcs_out_current_patch_data_offset(ctx));
+ if (!ctx->is_monolithic) {
+ /* Return epilog parameters from this function. */
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef ret = ctx->return_value;
+ LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
+ unsigned vgpr;
+
+ /* RW_BUFFERS pointer */
+ rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
+ rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, "");
+ rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, "");
+ rw0 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.zero, "");
+ rw1 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.one, "");
+ ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
+ ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");
+
+ /* Tess factor buffer soffset is after user SGPRs. */
+ tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_TESS_FACTOR_OFFSET);
+ ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
+ SI_TCS_NUM_USER_SGPR, "");
+
+ /* VGPRs */
+ rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
+ invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
+ tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
+
+ vgpr = SI_TCS_NUM_USER_SGPR + 1;
+ ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
+ ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
+ ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
+ ctx->return_value = ret;
+ return;
+ }
+
+ si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset);
}
static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
@@ -2214,16 +2305,26 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
"");
}
- /* Export PrimitiveID when PS needs it. */
- if (si_vs_exports_prim_id(ctx->shader)) {
- outputs[i].name = TGSI_SEMANTIC_PRIMID;
- outputs[i].sid = 0;
- outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- get_primitive_id(bld_base, 0));
- outputs[i].values[1] = bld_base->base.undef;
- outputs[i].values[2] = bld_base->base.undef;
- outputs[i].values[3] = bld_base->base.undef;
- i++;
+ if (ctx->is_monolithic) {
+ /* Export PrimitiveID when PS needs it. */
+ if (si_vs_exports_prim_id(ctx->shader)) {
+ outputs[i].name = TGSI_SEMANTIC_PRIMID;
+ outputs[i].sid = 0;
+ outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ get_primitive_id(bld_base, 0));
+ outputs[i].values[1] = bld_base->base.undef;
+ outputs[i].values[2] = bld_base->base.undef;
+ outputs[i].values[3] = bld_base->base.undef;
+ i++;
+ }
+ } else {
+ /* Return the primitive ID from the LLVM function. */
+ ctx->return_value =
+ LLVMBuildInsertValue(gallivm->builder,
+ ctx->return_value,
+ bitcast(bld_base, TGSI_TYPE_FLOAT,
+ get_primitive_id(bld_base, 0)),
+ VS_EPILOG_PRIMID_LOC, "");
}
si_llvm_export_vs(bld_base, outputs, i);
@@ -2284,6 +2385,7 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
LLVMValueRef *color, unsigned index,
+ unsigned samplemask_param,
bool is_last)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -2291,30 +2393,31 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
int i;
/* Clamp color */
- if (ctx->shader->key.ps.clamp_color)
+ if (ctx->shader->key.ps.epilog.clamp_color)
for (i = 0; i < 4; i++)
color[i] = radeon_llvm_saturate(bld_base, color[i]);
/* Alpha to one */
- if (ctx->shader->key.ps.alpha_to_one)
+ if (ctx->shader->key.ps.epilog.alpha_to_one)
color[3] = base->one;
/* Alpha test */
if (index == 0 &&
- ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+ ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
si_alpha_test(bld_base, color[3]);
/* Line & polygon smoothing */
- if (ctx->shader->key.ps.poly_line_smoothing)
- color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+ if (ctx->shader->key.ps.epilog.poly_line_smoothing)
+ color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
+ samplemask_param);
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (ctx->shader->key.ps.last_cbuf > 0) {
+ if (ctx->shader->key.ps.epilog.last_cbuf > 0) {
LLVMValueRef args[8][9];
int c, last = -1;
/* Get the export arguments, also find out what the last one is. */
- for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) {
+ for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
si_llvm_init_export_args(bld_base, color,
V_008DFC_SQ_EXP_MRT + c, args[c]);
if (args[c][0] != bld_base->uint_bld.zero)
@@ -2322,7 +2425,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
}
/* Emit all exports. */
- for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) {
+ for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
if (is_last && last == c) {
args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
args[c][2] = bld_base->uint_bld.one; /* DONE bit */
@@ -2385,11 +2488,11 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
* Otherwise, find the last color export.
*/
if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) {
- unsigned spi_format = shader->key.ps.spi_shader_col_format;
+ unsigned spi_format = shader->key.ps.epilog.spi_shader_col_format;
/* Don't export NULL and return if alpha-test is enabled. */
- if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS &&
- shader->key.ps.alpha_func != PIPE_FUNC_NEVER &&
+ if (shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS &&
+ shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER &&
(spi_format & 0xf) == 0)
spi_format |= V_028714_SPI_SHADER_32_AR;
@@ -2400,10 +2503,10 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
continue;
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (shader->key.ps.last_cbuf > 0) {
+ if (shader->key.ps.epilog.last_cbuf > 0) {
/* Just set this if any of the colorbuffers are enabled. */
if (spi_format &
- ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1))
+ ((1llu << (4 * (shader->key.ps.epilog.last_cbuf + 1))) - 1))
last_color_export = i;
continue;
}
@@ -2445,6 +2548,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
ctx->radeon_bld.soa.outputs[i][j], "");
si_export_mrt_color(bld_base, color, semantic_index,
+ SI_PARAM_SAMPLE_COVERAGE,
last_color_export == i);
break;
default:
@@ -2458,6 +2562,100 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
si_export_mrt_z(bld_base, depth, stencil, samplemask);
}
+/**
+ * Return PS outputs in this order:
+ *
+ * v[0:3] = color0.xyzw
+ * v[4:7] = color1.xyzw
+ * ...
+ * vN+0 = Depth
+ * vN+1 = Stencil
+ * vN+2 = SampleMask
+ * vN+3 = SampleMaskIn (used for OpenGL smoothing)
+ *
+ * The alpha-ref SGPR is returned via its original location.
+ */
+static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader *shader = ctx->shader;
+ struct lp_build_context *base = &bld_base->base;
+ struct tgsi_shader_info *info = &shader->selector->info;
+ LLVMBuilderRef builder = base->gallivm->builder;
+ unsigned i, j, first_vgpr, vgpr;
+
+ LLVMValueRef color[8][4] = {};
+ LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+ LLVMValueRef ret;
+
+ /* Read the output values. */
+ for (i = 0; i < info->num_outputs; i++) {
+ unsigned semantic_name = info->output_semantic_name[i];
+ unsigned semantic_index = info->output_semantic_index[i];
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_COLOR:
+ assert(semantic_index < 8);
+ for (j = 0; j < 4; j++) {
+ LLVMValueRef ptr = ctx->radeon_bld.soa.outputs[i][j];
+ LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
+ color[semantic_index][j] = result;
+ }
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ depth = LLVMBuildLoad(builder,
+ ctx->radeon_bld.soa.outputs[i][2], "");
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ stencil = LLVMBuildLoad(builder,
+ ctx->radeon_bld.soa.outputs[i][1], "");
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ samplemask = LLVMBuildLoad(builder,
+ ctx->radeon_bld.soa.outputs[i][0], "");
+ break;
+ default:
+ fprintf(stderr, "Warning: SI unhandled fs output type:%d\n",
+ semantic_name);
+ }
+ }
+
+ /* Fill the return structure. */
+ ret = ctx->return_value;
+
+ /* Set SGPRs. */
+ ret = LLVMBuildInsertValue(builder, ret,
+ bitcast(bld_base, TGSI_TYPE_SIGNED,
+ LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_ALPHA_REF)),
+ SI_SGPR_ALPHA_REF, "");
+
+ /* Set VGPRs */
+ first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
+ for (i = 0; i < ARRAY_SIZE(color); i++) {
+ if (!color[i][0])
+ continue;
+
+ for (j = 0; j < 4; j++)
+ ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
+ }
+ if (depth)
+ ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, "");
+ if (stencil)
+ ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, "");
+ if (samplemask)
+ ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, "");
+
+ /* Add the input sample mask for smoothing at the end. */
+ if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC)
+ vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC;
+ ret = LLVMBuildInsertValue(builder, ret,
+ LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_SAMPLE_COVERAGE), vgpr++, "");
+
+ ctx->return_value = ret;
+}
+
static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
@@ -2536,13 +2734,12 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
/**
* Load an image view, fmask view. or sampler state descriptor.
*/
-static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
- LLVMValueRef index, enum desc_type type)
+static LLVMValueRef get_sampler_desc_custom(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum desc_type type)
{
struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
- SI_PARAM_SAMPLERS);
switch (type) {
case DESC_IMAGE:
@@ -2558,12 +2755,21 @@ static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
/* The sampler state is at [12:15]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
- ptr = LLVMBuildPointerCast(builder, ptr,
- const_array(ctx->v4i32, 0), "");
+ list = LLVMBuildPointerCast(builder, list,
+ const_array(ctx->v4i32, 0), "");
break;
}
- return build_indexed_load_const(ctx, ptr, index);
+ return build_indexed_load_const(ctx, list, index);
+}
+
+static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
+ LLVMValueRef index, enum desc_type type)
+{
+ LLVMValueRef list = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_SAMPLERS);
+
+ return get_sampler_desc_custom(ctx, list, index, type);
}
static void tex_fetch_ptrs(
@@ -3546,6 +3752,30 @@ static const struct lp_build_tgsi_action interp_action = {
.emit = build_interp_intrinsic,
};
+static void si_create_function(struct si_shader_context *ctx,
+ LLVMTypeRef *returns, unsigned num_returns,
+ LLVMTypeRef *params, unsigned num_params,
+ int last_array_pointer, int last_sgpr)
+{
+ int i;
+
+ radeon_llvm_create_func(&ctx->radeon_bld, returns, num_returns,
+ params, num_params);
+ radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
+ ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type);
+
+ for (i = 0; i <= last_sgpr; ++i) {
+ LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
+
+ /* We tell llvm that array inputs are passed by value to allow Sinking pass
+ * to move load. Inputs are constant so this is fine. */
+ if (i <= last_array_pointer)
+ LLVMAddAttribute(P, LLVMByValAttribute);
+ else
+ LLVMAddAttribute(P, LLVMInRegAttribute);
+ }
+}
+
static void create_meta_data(struct si_shader_context *ctx)
{
struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
@@ -3579,15 +3809,57 @@ static void declare_streamout_params(struct si_shader_context *ctx,
}
}
+static unsigned llvm_get_type_size(LLVMTypeRef type)
+{
+ LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+ switch (kind) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(type) / 8;
+ case LLVMFloatTypeKind:
+ return 4;
+ case LLVMPointerTypeKind:
+ return 8;
+ case LLVMVectorTypeKind:
+ return LLVMGetVectorSize(type) *
+ llvm_get_type_size(LLVMGetElementType(type));
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void declare_tess_lds(struct si_shader_context *ctx)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type;
+
+ /* This is the upper bound, maximum is 32 inputs times 32 vertices */
+ unsigned vertex_data_dw_size = 32*32*4;
+ unsigned patch_data_dw_size = 32*4;
+ /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+ unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+ unsigned lds_dwords = patch_dw_size;
+
+ /* The actual size is computed outside of the shader to reduce
+ * the number of shader variants. */
+ ctx->lds =
+ LLVMAddGlobalInAddressSpace(gallivm->module,
+ LLVMArrayType(i32, lds_dwords),
+ "tess_lds",
+ LOCAL_ADDR_SPACE);
+}
+
static void create_function(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_shader *shader = ctx->shader;
- LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32;
- unsigned i, last_array_pointer, last_sgpr, num_params;
+ LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
+ LLVMTypeRef returns[16+32*4];
+ unsigned i, last_array_pointer, last_sgpr, num_params, num_return_sgprs;
+ unsigned num_returns = 0;
- v2i32 = LLVMVectorType(ctx->i32, 2);
v3i32 = LLVMVectorType(ctx->i32, 3);
params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
@@ -3630,6 +3902,20 @@ static void create_function(struct si_shader_context *ctx)
params[ctx->param_rel_auto_id = num_params++] = ctx->i32;
params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
params[ctx->param_instance_id = num_params++] = ctx->i32;
+
+ if (!ctx->is_monolithic &&
+ !ctx->is_gs_copy_shader) {
+ /* Vertex load indices. */
+ ctx->param_vertex_index0 = num_params;
+
+ for (i = 0; i < shader->selector->info.num_inputs; i++)
+ params[num_params++] = ctx->i32;
+
+ /* PrimitiveID output. */
+ if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
+ for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+ returns[num_returns++] = ctx->f32;
+ }
break;
case TGSI_PROCESSOR_TESS_CTRL:
@@ -3643,6 +3929,15 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_PATCH_ID] = ctx->i32;
params[SI_PARAM_REL_IDS] = ctx->i32;
num_params = SI_PARAM_REL_IDS+1;
+
+ if (!ctx->is_monolithic) {
+ /* PARAM_TESS_FACTOR_OFFSET is after user SGPRs. */
+ for (i = 0; i <= SI_TCS_NUM_USER_SGPR; i++)
+ returns[num_returns++] = ctx->i32; /* SGPRs */
+
+ for (i = 0; i < 3; i++)
+ returns[num_returns++] = ctx->f32; /* VGPRs */
+ }
break;
case TGSI_PROCESSOR_TESS_EVAL:
@@ -3663,6 +3958,11 @@ static void create_function(struct si_shader_context *ctx)
params[ctx->param_tes_v = num_params++] = ctx->f32;
params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32;
params[ctx->param_tes_patch_id = num_params++] = ctx->i32;
+
+ /* PrimitiveID output. */
+ if (!ctx->is_monolithic && !shader->key.tes.as_es)
+ for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+ returns[num_returns++] = ctx->f32;
break;
case TGSI_PROCESSOR_GEOMETRY:
@@ -3686,13 +3986,13 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_ALPHA_REF] = ctx->f32;
params[SI_PARAM_PRIM_MASK] = ctx->i32;
last_sgpr = SI_PARAM_PRIM_MASK;
- params[SI_PARAM_PERSP_SAMPLE] = v2i32;
- params[SI_PARAM_PERSP_CENTER] = v2i32;
- params[SI_PARAM_PERSP_CENTROID] = v2i32;
+ params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32;
+ params[SI_PARAM_PERSP_CENTER] = ctx->v2i32;
+ params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32;
params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
- params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
- params[SI_PARAM_LINEAR_CENTER] = v2i32;
- params[SI_PARAM_LINEAR_CENTROID] = v2i32;
+ params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
+ params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
+ params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
@@ -3701,8 +4001,39 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_FRONT_FACE] = ctx->i32;
params[SI_PARAM_ANCILLARY] = ctx->i32;
params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
- params[SI_PARAM_POS_FIXED_PT] = ctx->f32;
+ params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
num_params = SI_PARAM_POS_FIXED_PT+1;
+
+ if (!ctx->is_monolithic) {
+ /* Color inputs from the prolog. */
+ if (shader->selector->info.colors_read) {
+ unsigned num_color_elements =
+ util_bitcount(shader->selector->info.colors_read);
+
+ assert(num_params + num_color_elements <= ARRAY_SIZE(params));
+ for (i = 0; i < num_color_elements; i++)
+ params[num_params++] = ctx->f32;
+ }
+
+ /* Outputs for the epilog. */
+ num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
+ num_returns =
+ num_return_sgprs +
+ util_bitcount(shader->selector->info.colors_written) * 4 +
+ shader->selector->info.writes_z +
+ shader->selector->info.writes_stencil +
+ shader->selector->info.writes_samplemask +
+ 1 /* SampleMaskIn */;
+
+ num_returns = MAX2(num_returns,
+ num_return_sgprs +
+ PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
+
+ for (i = 0; i < num_return_sgprs; i++)
+ returns[i] = ctx->i32;
+ for (; i < num_returns; i++)
+ returns[i] = ctx->f32;
+ }
break;
default:
@@ -3711,20 +4042,38 @@ static void create_function(struct si_shader_context *ctx)
}
assert(num_params <= Elements(params));
- radeon_llvm_create_func(&ctx->radeon_bld, params, num_params);
- radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
-
- for (i = 0; i <= last_sgpr; ++i) {
- LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
- /* We tell llvm that array inputs are passed by value to allow Sinking pass
- * to move load. Inputs are constant so this is fine. */
- if (i <= last_array_pointer)
- LLVMAddAttribute(P, LLVMByValAttribute);
- else
- LLVMAddAttribute(P, LLVMInRegAttribute);
+ si_create_function(ctx, returns, num_returns, params,
+ num_params, last_array_pointer, last_sgpr);
+
+ /* Reserve register locations for VGPR inputs the PS prolog may need. */
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT &&
+ !ctx->is_monolithic) {
+ radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
+ "InitialPSInputAddr",
+ S_0286D0_PERSP_SAMPLE_ENA(1) |
+ S_0286D0_PERSP_CENTER_ENA(1) |
+ S_0286D0_PERSP_CENTROID_ENA(1) |
+ S_0286D0_LINEAR_SAMPLE_ENA(1) |
+ S_0286D0_LINEAR_CENTER_ENA(1) |
+ S_0286D0_LINEAR_CENTROID_ENA(1) |
+ S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_POS_FIXED_PT_ENA(1));
}
+ shader->info.num_input_sgprs = 0;
+ shader->info.num_input_vgprs = 0;
+
+ for (i = 0; i <= last_sgpr; ++i)
+ shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
+
+ /* Unused fragment shader inputs are eliminated by the compiler,
+ * so we don't know yet how many there will be.
+ */
+ if (ctx->type != TGSI_PROCESSOR_FRAGMENT)
+ for (; i < num_params; ++i)
+ shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
@@ -3740,22 +4089,8 @@ static void create_function(struct si_shader_context *ctx)
if ((ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
- ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
- /* This is the upper bound, maximum is 32 inputs times 32 vertices */
- unsigned vertex_data_dw_size = 32*32*4;
- unsigned patch_data_dw_size = 32*4;
- /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
- unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
- unsigned lds_dwords = patch_dw_size;
-
- /* The actual size is computed outside of the shader to reduce
- * the number of shader variants. */
- ctx->lds =
- LLVMAddGlobalInAddressSpace(gallivm->module,
- LLVMArrayType(ctx->i32, lds_dwords),
- "tess_lds",
- LOCAL_ADDR_SPACE);
- }
+ ctx->type == TGSI_PROCESSOR_TESS_EVAL)
+ declare_tess_lds(ctx);
}
static void preload_constants(struct si_shader_context *ctx)
@@ -3887,6 +4222,49 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
}
}
+static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
+ LLVMValueRef param_sampler_views,
+ unsigned param_pos_fixed_pt)
+{
+ struct lp_build_tgsi_context *bld_base =
+ &ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_emit_data result = {};
+ struct tgsi_full_instruction inst = {};
+ LLVMValueRef desc, sampler_index, address[2], pix;
+
+ /* Use the fixed-point gl_FragCoord input.
+ * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
+ * per coordinate to get the repeating effect.
+ */
+ address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5);
+ address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5);
+
+ /* Load the sampler view descriptor. */
+ sampler_index = lp_build_const_int32(gallivm, SI_POLY_STIPPLE_SAMPLER);
+ desc = get_sampler_desc_custom(ctx, param_sampler_views,
+ sampler_index, DESC_IMAGE);
+
+ /* Load the texel. */
+ inst.Instruction.Opcode = TGSI_OPCODE_TXF;
+ inst.Texture.Texture = TGSI_TEXTURE_2D_MSAA; /* = use load, not load_mip */
+ result.inst = &inst;
+ set_tex_fetch_args(ctx, &result, TGSI_OPCODE_TXF,
+ inst.Texture.Texture,
+ desc, NULL, address, ARRAY_SIZE(address), 0xf);
+ build_tex_intrinsic(&tex_action, bld_base, &result);
+
+ /* Kill the thread accordingly. */
+ pix = LLVMBuildExtractElement(gallivm->builder, result.output[0],
+ lp_build_const_int32(gallivm, 3), "");
+ pix = bitcast(bld_base, TGSI_TYPE_FLOAT, pix);
+ pix = LLVMBuildFNeg(gallivm->builder, pix, "");
+
+ lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+ LLVMVoidTypeInContext(gallivm->context),
+ &pix, 1, 0);
+}
+
void si_shader_binary_read_config(struct radeon_shader_binary *binary,
struct si_shader_config *conf,
unsigned symbol_offset)
@@ -3972,41 +4350,70 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
}
}
+static unsigned si_get_shader_binary_size(struct si_shader *shader)
+{
+ unsigned size = shader->binary.code_size;
+
+ if (shader->prolog)
+ size += shader->prolog->binary.code_size;
+ if (shader->epilog)
+ size += shader->epilog->binary.code_size;
+ return size;
+}
+
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
{
- const struct radeon_shader_binary *binary = &shader->binary;
- unsigned code_size = binary->code_size + binary->rodata_size;
+ const struct radeon_shader_binary *prolog =
+ shader->prolog ? &shader->prolog->binary : NULL;
+ const struct radeon_shader_binary *epilog =
+ shader->epilog ? &shader->epilog->binary : NULL;
+ const struct radeon_shader_binary *mainb = &shader->binary;
+ unsigned bo_size = si_get_shader_binary_size(shader) +
+ (!epilog ? mainb->rodata_size : 0);
unsigned char *ptr;
+ assert(!prolog || !prolog->rodata_size);
+ assert((!prolog && !epilog) || !mainb->rodata_size);
+ assert(!epilog || !epilog->rodata_size);
+
r600_resource_reference(&shader->bo, NULL);
shader->bo = si_resource_create_custom(&sscreen->b.b,
PIPE_USAGE_IMMUTABLE,
- code_size);
+ bo_size);
if (!shader->bo)
return -ENOMEM;
+ /* Upload. */
ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE);
- util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
- if (binary->rodata_size > 0) {
- ptr += binary->code_size;
- util_memcpy_cpu_to_le32(ptr, binary->rodata,
- binary->rodata_size);
+
+ if (prolog) {
+ util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
+ ptr += prolog->code_size;
}
+ util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
+ ptr += mainb->code_size;
+
+ if (epilog)
+ util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size);
+ else if (mainb->rodata_size > 0)
+ util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size);
+
sscreen->b.ws->buffer_unmap(shader->bo->buf);
return 0;
}
static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary,
- struct pipe_debug_callback *debug)
+ struct pipe_debug_callback *debug,
+ const char *name)
{
char *line, *p;
unsigned i, count;
if (binary->disasm_string) {
- fprintf(stderr, "\nShader Disassembly:\n\n");
- fprintf(stderr, "%s\n", binary->disasm_string);
+ fprintf(stderr, "Shader %s disassembly:\n", name);
+ fprintf(stderr, "%s", binary->disasm_string);
if (debug && debug->debug_message) {
/* Very long debug messages are cut off, so send the
@@ -4036,7 +4443,7 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary
"Shader Disassembly End");
}
} else {
- fprintf(stderr, "SI CODE:\n");
+ fprintf(stderr, "Shader %s binary:\n", name);
for (i = 0; i < binary->code_size; i += 4) {
fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i,
binary->code[i + 3], binary->code[i + 2],
@@ -4115,16 +4522,60 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
max_simd_waves);
}
+static const char *si_get_shader_name(struct si_shader *shader,
+ unsigned processor)
+{
+ switch (processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ if (shader->key.vs.as_es)
+ return "Vertex Shader as ES";
+ else if (shader->key.vs.as_ls)
+ return "Vertex Shader as LS";
+ else
+ return "Vertex Shader as VS";
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return "Tessellation Control Shader";
+ case TGSI_PROCESSOR_TESS_EVAL:
+ if (shader->key.tes.as_es)
+ return "Tessellation Evaluation Shader as ES";
+ else
+ return "Tessellation Evaluation Shader as VS";
+ case TGSI_PROCESSOR_GEOMETRY:
+ if (shader->gs_copy_shader == NULL)
+ return "GS Copy Shader as VS";
+ else
+ return "Geometry Shader";
+ case TGSI_PROCESSOR_FRAGMENT:
+ return "Pixel Shader";
+ case TGSI_PROCESSOR_COMPUTE:
+ return "Compute Shader";
+ default:
+ return "Unknown Shader";
+ }
+}
+
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor)
{
- if (r600_can_dump_shader(&sscreen->b, processor))
- if (!(sscreen->b.debug_flags & DBG_NO_ASM))
- si_shader_dump_disassembly(&shader->binary, debug);
+ if (r600_can_dump_shader(&sscreen->b, processor) &&
+ !(sscreen->b.debug_flags & DBG_NO_ASM)) {
+ fprintf(stderr, "\n%s:\n", si_get_shader_name(shader, processor));
+
+ if (shader->prolog)
+ si_shader_dump_disassembly(&shader->prolog->binary,
+ debug, "prolog");
+
+ si_shader_dump_disassembly(&shader->binary, debug, "main");
+
+ if (shader->epilog)
+ si_shader_dump_disassembly(&shader->epilog->binary,
+ debug, "epilog");
+ fprintf(stderr, "\n");
+ }
si_shader_dump_stats(sscreen, &shader->config,
shader->selector ? shader->selector->info.num_inputs : 0,
- shader->binary.code_size, debug, processor);
+ si_get_shader_binary_size(shader), debug, processor);
}
int si_compile_llvm(struct si_screen *sscreen,
@@ -4177,6 +4628,19 @@ int si_compile_llvm(struct si_screen *sscreen,
FREE(binary->global_symbol_offsets);
binary->config = NULL;
binary->global_symbol_offsets = NULL;
+
+ /* Some shaders can't have rodata because their binaries can be
+ * concatenated.
+ */
+ if (binary->rodata_size &&
+ (processor == TGSI_PROCESSOR_VERTEX ||
+ processor == TGSI_PROCESSOR_TESS_CTRL ||
+ processor == TGSI_PROCESSOR_TESS_EVAL ||
+ processor == TGSI_PROCESSOR_FRAGMENT)) {
+ fprintf(stderr, "radeonsi: The shader can't have rodata.");
+ return -EINVAL;
+ }
+
return r;
}
@@ -4196,7 +4660,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
- si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm, gsinfo);
+ si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm);
ctx->type = TGSI_PROCESSOR_VERTEX;
ctx->is_gs_copy_shader = true;
@@ -4241,7 +4705,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
- LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ LLVMBuildRet(gallivm->builder, ctx->return_value);
/* Dump LLVM IR before any optimization passes */
if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
@@ -4278,35 +4742,38 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
switch (shader) {
case PIPE_SHADER_VERTEX:
fprintf(f, " instance_divisors = {");
- for (i = 0; i < Elements(key->vs.instance_divisors); i++)
+ for (i = 0; i < Elements(key->vs.prolog.instance_divisors); i++)
fprintf(f, !i ? "%u" : ", %u",
- key->vs.instance_divisors[i]);
+ key->vs.prolog.instance_divisors[i]);
fprintf(f, "}\n");
fprintf(f, " as_es = %u\n", key->vs.as_es);
fprintf(f, " as_ls = %u\n", key->vs.as_ls);
- fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id);
+ fprintf(f, " export_prim_id = %u\n", key->vs.epilog.export_prim_id);
break;
case PIPE_SHADER_TESS_CTRL:
- fprintf(f, " prim_mode = %u\n", key->tcs.prim_mode);
+ fprintf(f, " prim_mode = %u\n", key->tcs.epilog.prim_mode);
break;
case PIPE_SHADER_TESS_EVAL:
fprintf(f, " as_es = %u\n", key->tes.as_es);
- fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id);
+ fprintf(f, " export_prim_id = %u\n", key->tes.epilog.export_prim_id);
break;
case PIPE_SHADER_GEOMETRY:
break;
case PIPE_SHADER_FRAGMENT:
- fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format);
- fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf);
- fprintf(f, " color_two_side = %u\n", key->ps.color_two_side);
- fprintf(f, " alpha_func = %u\n", key->ps.alpha_func);
- fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one);
- fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple);
- fprintf(f, " clamp_color = %u\n", key->ps.clamp_color);
+ fprintf(f, " prolog.color_two_side = %u\n", key->ps.prolog.color_two_side);
+ fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
+ fprintf(f, " prolog.force_persample_interp = %u\n", key->ps.prolog.force_persample_interp);
+ fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
+ fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
+ fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
+ fprintf(f, " epilog.alpha_func = %u\n", key->ps.epilog.alpha_func);
+ fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.epilog.alpha_to_one);
+ fprintf(f, " epilog.poly_line_smoothing = %u\n", key->ps.epilog.poly_line_smoothing);
+ fprintf(f, " epilog.clamp_color = %u\n", key->ps.epilog.clamp_color);
break;
default:
@@ -4317,13 +4784,12 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
struct si_shader *shader,
- LLVMTargetMachineRef tm,
- struct tgsi_shader_info *info)
+ LLVMTargetMachineRef tm)
{
struct lp_build_tgsi_context *bld_base;
memset(ctx, 0, sizeof(*ctx));
- radeon_llvm_context_init(&ctx->radeon_bld);
+ radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--");
ctx->tm = tm;
ctx->screen = sscreen;
if (shader && shader->selector)
@@ -4336,15 +4802,18 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i8 = LLVMInt8TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i32 = LLVMInt32TypeInContext(ctx->radeon_bld.gallivm.context);
- ctx->i128 = LLVMInt128TypeInContext(ctx->radeon_bld.gallivm.context);
+ ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context);
+ ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128);
ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context);
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
+ ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
bld_base = &ctx->radeon_bld.soa.bld_base;
- bld_base->info = info;
+ if (shader && shader->selector)
+ bld_base->info = &shader->selector->info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
@@ -4380,40 +4849,31 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
}
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
- struct si_shader *shader,
- struct pipe_debug_callback *debug)
+int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
- struct tgsi_token *tokens = sel->tokens;
struct si_shader_context ctx;
struct lp_build_tgsi_context *bld_base;
- struct tgsi_shader_info stipple_shader_info;
LLVMModuleRef mod;
int r = 0;
- bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
- shader->key.ps.poly_stipple;
-
- if (poly_stipple) {
- tokens = util_pstipple_create_fragment_shader(tokens, NULL,
- SI_POLY_STIPPLE_SAMPLER,
- TGSI_FILE_SYSTEM_VALUE);
- tgsi_scan_shader(tokens, &stipple_shader_info);
- }
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
si_dump_shader_key(sel->type, &shader->key, stderr);
- tgsi_dump(tokens, 0);
+ tgsi_dump(sel->tokens, 0);
si_dump_streamout(&sel->so);
}
- si_init_shader_ctx(&ctx, sscreen, shader, tm,
- poly_stipple ? &stipple_shader_info : &sel->info);
+ si_init_shader_ctx(&ctx, sscreen, shader, tm);
+ ctx.is_monolithic = is_monolithic;
- shader->uses_instanceid = sel->info.uses_instanceid;
+ shader->info.uses_instanceid = sel->info.uses_instanceid;
bld_base = &ctx.radeon_bld.soa.bld_base;
ctx.radeon_bld.load_system_value = declare_system_value;
@@ -4447,7 +4907,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
break;
case TGSI_PROCESSOR_FRAGMENT:
ctx.radeon_bld.load_input = declare_input_fs;
- bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
+ if (is_monolithic)
+ bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
+ else
+ bld_base->emit_epilogue = si_llvm_return_fs_outputs;
break;
default:
assert(!"Unsupported shader type");
@@ -4461,6 +4924,14 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
preload_streamout_buffers(&ctx);
preload_ring_buffers(&ctx);
+ if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
+ shader->key.ps.prolog.poly_stipple) {
+ LLVMValueRef views = LLVMGetParam(ctx.radeon_bld.main_fn,
+ SI_PARAM_SAMPLERS);
+ si_llvm_emit_polygon_stipple(&ctx, views,
+ SI_PARAM_POS_FIXED_PT);
+ }
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
int i;
for (i = 0; i < 4; i++) {
@@ -4470,12 +4941,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
}
}
- if (!lp_build_tgsi_llvm(bld_base, tokens)) {
+ if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
goto out;
}
- LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ LLVMBuildRet(bld_base->base.gallivm->builder, ctx.return_value);
mod = bld_base->base.gallivm->module;
/* Dump LLVM IR before any optimization passes */
@@ -4492,16 +4963,49 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
goto out;
}
- si_shader_dump(sscreen, shader, debug, ctx.type);
+ radeon_llvm_dispose(&ctx.radeon_bld);
- r = si_shader_binary_upload(sscreen, shader);
- if (r) {
- fprintf(stderr, "LLVM failed to upload shader\n");
- goto out;
+ /* Calculate the number of fragment input VGPRs. */
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+ shader->info.num_input_vgprs = 0;
+ shader->info.face_vgpr_index = -1;
+
+ if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 3;
+ if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 2;
+ if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) {
+ shader->info.face_vgpr_index = shader->info.num_input_vgprs;
+ shader->info.num_input_vgprs += 1;
+ }
+ if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
+ if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
+ shader->info.num_input_vgprs += 1;
}
- radeon_llvm_dispose(&ctx.radeon_bld);
-
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
shader->gs_copy_shader->selector = shader->selector;
@@ -4517,11 +5021,968 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
out:
for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
FREE(ctx.constants[i]);
- if (poly_stipple)
- tgsi_free_tokens(tokens);
return r;
}
+/**
+ * Create, compile and return a shader part (prolog or epilog).
+ *
+ * \param sscreen screen
+ * \param list list of shader parts of the same category
+ * \param key shader part key
+ * \param tm LLVM target machine
+ * \param debug debug callback
+ * \param compile the callback responsible for compilation
+ * \return non-NULL on success
+ */
+static struct si_shader_part *
+si_get_shader_part(struct si_screen *sscreen,
+ struct si_shader_part **list,
+ union si_shader_part_key *key,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ bool (*compile)(struct si_screen *,
+ LLVMTargetMachineRef,
+ struct pipe_debug_callback *,
+ struct si_shader_part *))
+{
+ struct si_shader_part *result;
+
+ pipe_mutex_lock(sscreen->shader_parts_mutex);
+
+ /* Find existing. */
+ for (result = *list; result; result = result->next) {
+ if (memcmp(&result->key, key, sizeof(*key)) == 0) {
+ pipe_mutex_unlock(sscreen->shader_parts_mutex);
+ return result;
+ }
+ }
+
+ /* Compile a new one. */
+ result = CALLOC_STRUCT(si_shader_part);
+ result->key = *key;
+ if (!compile(sscreen, tm, debug, result)) {
+ FREE(result);
+ pipe_mutex_unlock(sscreen->shader_parts_mutex);
+ return NULL;
+ }
+
+ result->next = *list;
+ *list = result;
+ pipe_mutex_unlock(sscreen->shader_parts_mutex);
+ return result;
+}
+
+/**
+ * Create a vertex shader prolog.
+ *
+ * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
+ * All inputs are returned unmodified. The vertex load indices are
+ * stored after them, which will used by the API VS for fetching inputs.
+ *
+ * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are:
+ * input_v0,
+ * input_v1,
+ * input_v2,
+ * input_v3,
+ * (VertexID + BaseVertex),
+ * (InstanceID + StartInstance),
+ * (InstanceID / 2 + StartInstance)
+ */
+static bool si_compile_vs_prolog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ LLVMTypeRef *params, *returns;
+ LLVMValueRef ret, func;
+ int last_sgpr, num_params, num_returns, i;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = TGSI_PROCESSOR_VERTEX;
+ ctx.param_vertex_id = key->vs_prolog.num_input_sgprs;
+ ctx.param_instance_id = key->vs_prolog.num_input_sgprs + 3;
+
+ /* 4 preloaded VGPRs + vertex load indices as prolog outputs */
+ params = alloca((key->vs_prolog.num_input_sgprs + 4) *
+ sizeof(LLVMTypeRef));
+ returns = alloca((key->vs_prolog.num_input_sgprs + 4 +
+ key->vs_prolog.last_input + 1) *
+ sizeof(LLVMTypeRef));
+ num_params = 0;
+ num_returns = 0;
+
+ /* Declare input and output SGPRs. */
+ num_params = 0;
+ for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
+ params[num_params++] = ctx.i32;
+ returns[num_returns++] = ctx.i32;
+ }
+ last_sgpr = num_params - 1;
+
+ /* 4 preloaded VGPRs (outputs must be floats) */
+ for (i = 0; i < 4; i++) {
+ params[num_params++] = ctx.i32;
+ returns[num_returns++] = ctx.f32;
+ }
+
+ /* Vertex load indices. */
+ for (i = 0; i <= key->vs_prolog.last_input; i++)
+ returns[num_returns++] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, returns, num_returns, params,
+ num_params, -1, last_sgpr);
+ func = ctx.radeon_bld.main_fn;
+
+ /* Copy inputs to outputs. This should be no-op, as the registers match,
+ * but it will prevent the compiler from overwriting them unintentionally.
+ */
+ ret = ctx.return_value;
+ for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ }
+ for (i = num_params - 4; i < num_params; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ p = LLVMBuildBitCast(gallivm->builder, p, ctx.f32, "");
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ }
+
+ /* Compute vertex load indices from instance divisors. */
+ for (i = 0; i <= key->vs_prolog.last_input; i++) {
+ unsigned divisor = key->vs_prolog.states.instance_divisors[i];
+ LLVMValueRef index;
+
+ if (divisor) {
+ /* InstanceID / Divisor + StartInstance */
+ index = get_instance_index_for_fetch(&ctx.radeon_bld,
+ SI_SGPR_START_INSTANCE,
+ divisor);
+ } else {
+ /* VertexID + BaseVertex */
+ index = LLVMBuildAdd(gallivm->builder,
+ LLVMGetParam(func, ctx.param_vertex_id),
+ LLVMGetParam(func, SI_SGPR_BASE_VERTEX), "");
+ }
+
+ index = LLVMBuildBitCast(gallivm->builder, index, ctx.f32, "");
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
+ num_params++, "");
+ }
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ret);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Vertex Shader Prolog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Compile the vertex shader epilog. This is also used by the tessellation
+ * evaluation shader compiled as VS.
+ *
+ * The input is PrimitiveID.
+ *
+ * If PrimitiveID is required by the pixel shader, export it.
+ * Otherwise, do nothing.
+ */
+static bool si_compile_vs_epilog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+ LLVMTypeRef params[5];
+ int num_params, i;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, NULL, tm);
+ ctx.type = TGSI_PROCESSOR_VERTEX;
+
+ /* Declare input VGPRs. */
+ num_params = key->vs_epilog.states.export_prim_id ?
+ (VS_EPILOG_PRIMID_LOC + 1) : 0;
+ assert(num_params <= ARRAY_SIZE(params));
+
+ for (i = 0; i < num_params; i++)
+ params[i] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, NULL, 0, params, num_params,
+ -1, -1);
+
+ /* Emit exports. */
+ if (key->vs_epilog.states.export_prim_id) {
+ struct lp_build_context *base = &bld_base->base;
+ struct lp_build_context *uint = &bld_base->uint_bld;
+ LLVMValueRef args[9];
+
+ args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
+ args[1] = uint->zero; /* whether the EXEC mask is valid */
+ args[2] = uint->zero; /* DONE bit */
+ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_PARAM +
+ key->vs_epilog.prim_id_param_offset);
+ args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+ args[5] = LLVMGetParam(ctx.radeon_bld.main_fn,
+ VS_EPILOG_PRIMID_LOC); /* X */
+ args[6] = uint->undef; /* Y */
+ args[7] = uint->undef; /* Z */
+ args[8] = uint->undef; /* W */
+
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
+ }
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ctx.return_value);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Vertex Shader Epilog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Create & compile a vertex shader epilog. This a helper used by VS and TES.
+ */
+static bool si_get_vs_epilog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug,
+ struct si_vs_epilog_bits *states)
+{
+ union si_shader_part_key epilog_key;
+
+ memset(&epilog_key, 0, sizeof(epilog_key));
+ epilog_key.vs_epilog.states = *states;
+
+ /* Set up the PrimitiveID output. */
+ if (shader->key.vs.epilog.export_prim_id) {
+ unsigned index = shader->selector->info.num_outputs;
+ unsigned offset = shader->info.nr_param_exports++;
+
+ epilog_key.vs_epilog.prim_id_param_offset = offset;
+ assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[index] = offset;
+ }
+
+ shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs,
+ &epilog_key, tm, debug,
+ si_compile_vs_epilog);
+ return shader->epilog != NULL;
+}
+
+/**
+ * Select and compile (or reuse) vertex shader parts (prolog & epilog).
+ */
+static bool si_shader_select_vs_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+ union si_shader_part_key prolog_key;
+ unsigned i;
+
+ /* Get the prolog. */
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.vs_prolog.states = shader->key.vs.prolog;
+ prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ prolog_key.vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
+
+ /* The prolog is a no-op if there are no inputs. */
+ if (info->num_inputs) {
+ shader->prolog =
+ si_get_shader_part(sscreen, &sscreen->vs_prologs,
+ &prolog_key, tm, debug,
+ si_compile_vs_prolog);
+ if (!shader->prolog)
+ return false;
+ }
+
+ /* Get the epilog. */
+ if (!shader->key.vs.as_es && !shader->key.vs.as_ls &&
+ !si_get_vs_epilog(sscreen, tm, shader, debug,
+ &shader->key.vs.epilog))
+ return false;
+
+ /* Set the instanceID flag. */
+ for (i = 0; i < info->num_inputs; i++)
+ if (prolog_key.vs_prolog.states.instance_divisors[i])
+ shader->info.uses_instanceid = true;
+
+ return true;
+}
+
+/**
+ * Select and compile (or reuse) TES parts (epilog).
+ */
+static bool si_shader_select_tes_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ if (shader->key.tes.as_es)
+ return true;
+
+ /* TES compiled as VS. */
+ return si_get_vs_epilog(sscreen, tm, shader, debug,
+ &shader->key.tes.epilog);
+}
+
+/**
+ * Compile the TCS epilog. This writes tesselation factors to memory based on
+ * the output primitive type of the tesselator (determined by TES).
+ */
+static bool si_compile_tcs_epilog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+ LLVMTypeRef params[16];
+ LLVMValueRef func;
+ int last_array_pointer, last_sgpr, num_params;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = TGSI_PROCESSOR_TESS_CTRL;
+ shader.key.tcs.epilog = key->tcs_epilog.states;
+
+ /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */
+ params[SI_PARAM_RW_BUFFERS] = const_array(ctx.v16i8, SI_NUM_RW_BUFFERS);
+ last_array_pointer = SI_PARAM_RW_BUFFERS;
+ params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
+ params[SI_PARAM_SAMPLERS] = ctx.i64;
+ params[SI_PARAM_UNUSED] = ctx.i64;
+ params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
+ params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
+ params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
+ params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx.i32;
+ last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+ num_params = last_sgpr + 1;
+
+ params[num_params++] = ctx.i32; /* patch index within the wave (REL_PATCH_ID) */
+ params[num_params++] = ctx.i32; /* invocation ID within the patch */
+ params[num_params++] = ctx.i32; /* LDS offset where tess factors should be loaded from */
+
+ /* Create the function. */
+ si_create_function(&ctx, NULL, 0, params, num_params,
+ last_array_pointer, last_sgpr);
+ declare_tess_lds(&ctx);
+ func = ctx.radeon_bld.main_fn;
+
+ si_write_tess_factors(bld_base,
+ LLVMGetParam(func, last_sgpr + 1),
+ LLVMGetParam(func, last_sgpr + 2),
+ LLVMGetParam(func, last_sgpr + 3));
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ctx.return_value);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Tessellation Control Shader Epilog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Select and compile (or reuse) TCS parts (epilog).
+ */
+static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ union si_shader_part_key epilog_key;
+
+ /* Get the epilog. */
+ memset(&epilog_key, 0, sizeof(epilog_key));
+ epilog_key.tcs_epilog.states = shader->key.tcs.epilog;
+
+ shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
+ &epilog_key, tm, debug,
+ si_compile_tcs_epilog);
+ return shader->epilog != NULL;
+}
+
+/**
+ * Compile the pixel shader prolog. This handles:
+ * - two-side color selection and interpolation
+ * - overriding interpolation parameters for the API PS
+ * - polygon stippling
+ *
+ * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
+ * overriden by other states. (e.g. per-sample interpolation)
+ * Interpolated colors are stored after the preloaded VGPRs.
+ */
+static bool si_compile_ps_prolog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ LLVMTypeRef *params;
+ LLVMValueRef ret, func;
+ int last_sgpr, num_params, num_returns, i, num_color_channels;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = TGSI_PROCESSOR_FRAGMENT;
+ shader.key.ps.prolog = key->ps_prolog.states;
+
+ /* Number of inputs + 8 color elements. */
+ params = alloca((key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.num_input_vgprs + 8) *
+ sizeof(LLVMTypeRef));
+
+ /* Declare inputs. */
+ num_params = 0;
+ for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
+ params[num_params++] = ctx.i32;
+ last_sgpr = num_params - 1;
+
+ for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
+ params[num_params++] = ctx.f32;
+
+ /* Declare outputs (same as inputs + add colors if needed) */
+ num_returns = num_params;
+ num_color_channels = util_bitcount(key->ps_prolog.colors_read);
+ for (i = 0; i < num_color_channels; i++)
+ params[num_returns++] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, params, num_returns, params,
+ num_params, -1, last_sgpr);
+ func = ctx.radeon_bld.main_fn;
+
+ /* Copy inputs to outputs. This should be no-op, as the registers match,
+ * but it will prevent the compiler from overwriting them unintentionally.
+ */
+ ret = ctx.return_value;
+ for (i = 0; i < num_params; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ }
+
+ /* Polygon stippling. */
+ if (key->ps_prolog.states.poly_stipple) {
+ /* POS_FIXED_PT is always last. */
+ unsigned pos = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.num_input_vgprs - 1;
+ LLVMValueRef ptr[2], views;
+
+ /* Get the pointer to sampler views. */
+ ptr[0] = LLVMGetParam(func, SI_SGPR_SAMPLERS);
+ ptr[1] = LLVMGetParam(func, SI_SGPR_SAMPLERS+1);
+ views = lp_build_gather_values(gallivm, ptr, 2);
+ views = LLVMBuildBitCast(gallivm->builder, views, ctx.i64, "");
+ views = LLVMBuildIntToPtr(gallivm->builder, views,
+ const_array(ctx.v8i32, SI_NUM_SAMPLERS), "");
+
+ si_llvm_emit_polygon_stipple(&ctx, views, pos);
+ }
+
+ /* Interpolate colors. */
+ for (i = 0; i < 2; i++) {
+ unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
+ unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.face_vgpr_index;
+ LLVMValueRef interp[2], color[4];
+ LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
+
+ if (!writemask)
+ continue;
+
+ /* If the interpolation qualifier is not CONSTANT (-1). */
+ if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
+ unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.color_interp_vgpr_index[i];
+
+ interp[0] = LLVMGetParam(func, interp_vgpr);
+ interp[1] = LLVMGetParam(func, interp_vgpr + 1);
+ interp_ij = lp_build_gather_values(gallivm, interp, 2);
+ interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
+ ctx.v2i32, "");
+ }
+
+ /* Use the absolute location of the input. */
+ prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
+
+ if (key->ps_prolog.states.color_two_side) {
+ face = LLVMGetParam(func, face_vgpr);
+ face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
+ }
+
+ interp_fs_input(&ctx,
+ key->ps_prolog.color_attr_index[i],
+ TGSI_SEMANTIC_COLOR, i,
+ key->ps_prolog.num_interp_inputs,
+ key->ps_prolog.colors_read, interp_ij,
+ prim_mask, face, color);
+
+ while (writemask) {
+ unsigned chan = u_bit_scan(&writemask);
+ ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
+ num_params++, "");
+ }
+ }
+
+ /* Force per-sample interpolation. */
+ if (key->ps_prolog.states.force_persample_interp) {
+ unsigned i, base = key->ps_prolog.num_input_sgprs;
+ LLVMValueRef persp_sample[2], linear_sample[2];
+
+ /* Read PERSP_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ persp_sample[i] = LLVMGetParam(func, base + i);
+ /* Overwrite PERSP_CENTER. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_sample[i], base + 2 + i, "");
+ /* Overwrite PERSP_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ persp_sample[i], base + 4 + i, "");
+ /* Read LINEAR_SAMPLE. */
+ for (i = 0; i < 2; i++)
+ linear_sample[i] = LLVMGetParam(func, base + 6 + i);
+ /* Overwrite LINEAR_CENTER. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_sample[i], base + 8 + i, "");
+ /* Overwrite LINEAR_CENTROID. */
+ for (i = 0; i < 2; i++)
+ ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ linear_sample[i], base + 10 + i, "");
+ }
+
+ /* Compile. */
+ LLVMBuildRet(gallivm->builder, ret);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Fragment Shader Prolog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Compile the pixel shader epilog. This handles everything that must be
+ * emulated for pixel shader exports. (alpha-test, format conversions, etc)
+ */
+static bool si_compile_ps_epilog(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct pipe_debug_callback *debug,
+ struct si_shader_part *out)
+{
+ union si_shader_part_key *key = &out->key;
+ struct si_shader shader = {};
+ struct si_shader_context ctx;
+ struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
+ LLVMTypeRef params[16+8*4+3];
+ LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+ int last_array_pointer, last_sgpr, num_params, i;
+ bool status = true;
+
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ ctx.type = TGSI_PROCESSOR_FRAGMENT;
+ shader.key.ps.epilog = key->ps_epilog.states;
+
+ /* Declare input SGPRs. */
+ params[SI_PARAM_RW_BUFFERS] = ctx.i64;
+ params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
+ params[SI_PARAM_SAMPLERS] = ctx.i64;
+ params[SI_PARAM_UNUSED] = ctx.i64;
+ params[SI_PARAM_ALPHA_REF] = ctx.f32;
+ last_array_pointer = -1;
+ last_sgpr = SI_PARAM_ALPHA_REF;
+
+ /* Declare input VGPRs. */
+ num_params = (last_sgpr + 1) +
+ util_bitcount(key->ps_epilog.colors_written) * 4 +
+ key->ps_epilog.writes_z +
+ key->ps_epilog.writes_stencil +
+ key->ps_epilog.writes_samplemask;
+
+ num_params = MAX2(num_params,
+ last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
+
+ assert(num_params <= ARRAY_SIZE(params));
+
+ for (i = last_sgpr + 1; i < num_params; i++)
+ params[i] = ctx.f32;
+
+ /* Create the function. */
+ si_create_function(&ctx, NULL, 0, params, num_params,
+ last_array_pointer, last_sgpr);
+ /* Disable elimination of unused inputs. */
+ radeon_llvm_add_attribute(ctx.radeon_bld.main_fn,
+ "InitialPSInputAddr", 0xffffff);
+
+ /* Process colors. */
+ unsigned vgpr = last_sgpr + 1;
+ unsigned colors_written = key->ps_epilog.colors_written;
+ int last_color_export = -1;
+
+ /* Find the last color export. */
+ if (!key->ps_epilog.writes_z &&
+ !key->ps_epilog.writes_stencil &&
+ !key->ps_epilog.writes_samplemask) {
+ unsigned spi_format = key->ps_epilog.states.spi_shader_col_format;
+
+ /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+ if (colors_written == 0x1 && key->ps_epilog.states.last_cbuf > 0) {
+ /* Just set this if any of the colorbuffers are enabled. */
+ if (spi_format &
+ ((1llu << (4 * (key->ps_epilog.states.last_cbuf + 1))) - 1))
+ last_color_export = 0;
+ } else {
+ for (i = 0; i < 8; i++)
+ if (colors_written & (1 << i) &&
+ (spi_format >> (i * 4)) & 0xf)
+ last_color_export = i;
+ }
+ }
+
+ while (colors_written) {
+ LLVMValueRef color[4];
+ int mrt = u_bit_scan(&colors_written);
+
+ for (i = 0; i < 4; i++)
+ color[i] = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+
+ si_export_mrt_color(bld_base, color, mrt,
+ num_params - 1,
+ mrt == last_color_export);
+ }
+
+ /* Process depth, stencil, samplemask. */
+ if (key->ps_epilog.writes_z)
+ depth = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+ if (key->ps_epilog.writes_stencil)
+ stencil = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+ if (key->ps_epilog.writes_samplemask)
+ samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
+
+ if (depth || stencil || samplemask)
+ si_export_mrt_z(bld_base, depth, stencil, samplemask);
+ else if (last_color_export == -1)
+ si_export_null(bld_base);
+
+ /* Compile. */
+ LLVMBuildRetVoid(gallivm->builder);
+ radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+ if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+ gallivm->module, debug, ctx.type,
+ "Fragment Shader Epilog"))
+ status = false;
+
+ radeon_llvm_dispose(&ctx.radeon_bld);
+ return status;
+}
+
+/**
+ * Select and compile (or reuse) pixel shader parts (prolog & epilog).
+ */
+static bool si_shader_select_ps_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+ union si_shader_part_key prolog_key;
+ union si_shader_part_key epilog_key;
+ unsigned i;
+
+ /* Get the prolog. */
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.ps_prolog.states = shader->key.ps.prolog;
+ prolog_key.ps_prolog.colors_read = info->colors_read;
+ prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
+
+ if (info->colors_read) {
+ unsigned *color = shader->selector->color_attr_index;
+
+ if (shader->key.ps.prolog.color_two_side) {
+ /* BCOLORs are stored after the last input. */
+ prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
+ prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
+ shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ unsigned location = info->input_interpolate_loc[color[i]];
+
+ if (!(info->colors_read & (0xf << i*4)))
+ continue;
+
+ prolog_key.ps_prolog.color_attr_index[i] = color[i];
+
+ /* Force per-sample interpolation for the colors here. */
+ if (shader->key.ps.prolog.force_persample_interp)
+ location = TGSI_INTERPOLATE_LOC_SAMPLE;
+
+ switch (info->input_interpolate[color[i]]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ case TGSI_INTERPOLATE_COLOR:
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+
+ /* The prolog is a no-op if these aren't set. */
+ if (prolog_key.ps_prolog.colors_read ||
+ prolog_key.ps_prolog.states.force_persample_interp ||
+ prolog_key.ps_prolog.states.poly_stipple) {
+ shader->prolog =
+ si_get_shader_part(sscreen, &sscreen->ps_prologs,
+ &prolog_key, tm, debug,
+ si_compile_ps_prolog);
+ if (!shader->prolog)
+ return false;
+ }
+
+ /* Get the epilog. */
+ memset(&epilog_key, 0, sizeof(epilog_key));
+ epilog_key.ps_epilog.colors_written = info->colors_written;
+ epilog_key.ps_epilog.writes_z = info->writes_z;
+ epilog_key.ps_epilog.writes_stencil = info->writes_stencil;
+ epilog_key.ps_epilog.writes_samplemask = info->writes_samplemask;
+ epilog_key.ps_epilog.states = shader->key.ps.epilog;
+
+ shader->epilog =
+ si_get_shader_part(sscreen, &sscreen->ps_epilogs,
+ &epilog_key, tm, debug,
+ si_compile_ps_epilog);
+ if (!shader->epilog)
+ return false;
+
+ /* Enable POS_FIXED_PT if polygon stippling is enabled. */
+ if (shader->key.ps.prolog.poly_stipple) {
+ shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
+ assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
+ }
+
+ /* Set up the enable bits for per-sample shading if needed. */
+ if (shader->key.ps.prolog.force_persample_interp) {
+ if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
+ }
+ if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
+ shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+ shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
+ }
+ }
+
+ /* POW_W_FLOAT requires that one of the perspective weights is enabled. */
+ if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
+ !(shader->config.spi_ps_input_ena & 0xf)) {
+ shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
+ assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr));
+ }
+
+ /* At least one pair of interpolation weights must be enabled. */
+ if (!(shader->config.spi_ps_input_ena & 0x7f)) {
+ shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
+ assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
+ }
+
+ /* The sample mask input is always enabled, because the API shader always
+ * passes it through to the epilog. Disable it here if it's unused.
+ */
+ if (!shader->key.ps.epilog.poly_line_smoothing &&
+ !shader->selector->info.reads_samplemask)
+ shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
+
+ return true;
+}
+
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ struct si_shader *mainp = shader->selector->main_shader_part;
+ int r;
+
+ /* LS and ES are always compiled on demand. */
+ if (!mainp ||
+ (shader->selector->type == PIPE_SHADER_VERTEX &&
+ (shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+ (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
+ shader->key.tes.as_es)) {
+ /* Monolithic shader (compiled as a whole, has many variants,
+ * may take a long time to compile).
+ */
+ r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
+ if (r)
+ return r;
+ } else {
+ /* The shader consists of 2-3 parts:
+ *
+ * - the middle part is the user shader, it has 1 variant only
+ * and it was compiled during the creation of the shader
+ * selector
+ * - the prolog part is inserted at the beginning
+ * - the epilog part is inserted at the end
+ *
+ * The prolog and epilog have many (but simple) variants.
+ */
+
+ /* Copy the compiled TGSI shader data over. */
+ shader->is_binary_shared = true;
+ shader->binary = mainp->binary;
+ shader->config = mainp->config;
+ shader->info.num_input_sgprs = mainp->info.num_input_sgprs;
+ shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
+ shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
+ memcpy(shader->info.vs_output_param_offset,
+ mainp->info.vs_output_param_offset,
+ sizeof(mainp->info.vs_output_param_offset));
+ shader->info.uses_instanceid = mainp->info.uses_instanceid;
+ shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
+ shader->info.nr_param_exports = mainp->info.nr_param_exports;
+
+ /* Select prologs and/or epilogs. */
+ switch (shader->selector->type) {
+ case PIPE_SHADER_VERTEX:
+ if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
+ return -1;
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ if (!si_shader_select_tcs_parts(sscreen, tm, shader, debug))
+ return -1;
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
+ return -1;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
+ return -1;
+
+ /* Make sure we have at least as many VGPRs as there
+ * are allocated inputs.
+ */
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->info.num_input_vgprs);
+ break;
+ }
+
+ /* Update SGPR and VGPR counts. */
+ if (shader->prolog) {
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+ shader->prolog->config.num_sgprs);
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->prolog->config.num_vgprs);
+ }
+ if (shader->epilog) {
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+ shader->epilog->config.num_sgprs);
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->epilog->config.num_vgprs);
+ }
+ }
+
+ si_shader_dump(sscreen, shader, debug, shader->selector->info.processor);
+
+ /* Upload. */
+ r = si_shader_binary_upload(sscreen, shader);
+ if (r) {
+ fprintf(stderr, "LLVM failed to upload shader\n");
+ return r;
+ }
+
+ return 0;
+}
+
void si_shader_destroy(struct si_shader *shader)
{
if (shader->gs_copy_shader) {
@@ -4534,5 +5995,6 @@ void si_shader_destroy(struct si_shader *shader)
r600_resource_reference(&shader->bo, NULL);
- radeon_shader_binary_clean(&shader->binary);
+ if (!shader->is_binary_shared)
+ radeon_shader_binary_clean(&shader->binary);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index dc75e0330e4..ff5c24d8918 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -75,6 +75,8 @@
struct radeon_shader_binary;
struct radeon_shader_reloc;
+#define SI_MAX_VS_OUTPUTS 40
+
#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */
#define SI_SGPR_CONST_BUFFERS 2
#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */
@@ -169,7 +171,7 @@ struct radeon_shader_reloc;
#define SI_PARAM_SAMPLE_COVERAGE 20
#define SI_PARAM_POS_FIXED_PT 21
-#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
+#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
struct si_shader;
@@ -181,6 +183,11 @@ struct si_shader_selector {
struct si_shader *first_variant; /* immutable after the first variant */
struct si_shader *last_variant; /* mutable */
+ /* The compiled TGSI shader expecting a prolog and/or epilog (not
+ * uploaded to a buffer).
+ */
+ struct si_shader *main_shader_part;
+
struct tgsi_token *tokens;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
@@ -199,6 +206,7 @@ struct si_shader_selector {
unsigned max_gsvs_emit_size;
/* PS parameters. */
+ unsigned color_attr_index[2];
unsigned db_shader_control;
/* Set 0xf or 0x0 (4 bits) per each written output.
* ANDed with spi_shader_col_format.
@@ -221,37 +229,103 @@ struct si_shader_selector {
* With both: LS | HS | ES | GS | VS | PS
*/
+/* Common VS bits between the shader key and the prolog key. */
+struct si_vs_prolog_bits {
+ unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
+};
+
+/* Common VS bits between the shader key and the epilog key. */
+struct si_vs_epilog_bits {
+ unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
+ /* TODO:
+ * - skip clipdist, culldist (including clipvertex code) exports based
+ * on which clip_plane_enable bits are set
+ * - skip layer, viewport, clipdist, and culldist parameter exports
+ * if PS doesn't read them
+ */
+};
+
+/* Common TCS bits between the shader key and the epilog key. */
+struct si_tcs_epilog_bits {
+ unsigned prim_mode:3;
+};
+
+/* Common PS bits between the shader key and the prolog key. */
+struct si_ps_prolog_bits {
+ unsigned color_two_side:1;
+ /* TODO: add a flatshade bit that skips interpolation for colors */
+ unsigned poly_stipple:1;
+ unsigned force_persample_interp:1;
+ /* TODO:
+ * - add force_center_interp if MSAA is disabled and centroid or
+ * sample are present
+ * - add force_center_interp_bc_optimize to force center interpolation
+ * based on the bc_optimize SGPR bit if MSAA is enabled, centroid is
+ * present and sample isn't present.
+ */
+};
+
+/* Common PS bits between the shader key and the epilog key. */
+struct si_ps_epilog_bits {
+ unsigned spi_shader_col_format;
+ unsigned color_is_int8:8;
+ unsigned last_cbuf:3;
+ unsigned alpha_func:3;
+ unsigned alpha_to_one:1;
+ unsigned poly_line_smoothing:1;
+ unsigned clamp_color:1;
+};
+
+union si_shader_part_key {
+ struct {
+ struct si_vs_prolog_bits states;
+ unsigned num_input_sgprs:5;
+ unsigned last_input:4;
+ } vs_prolog;
+ struct {
+ struct si_vs_epilog_bits states;
+ unsigned prim_id_param_offset:5;
+ } vs_epilog;
+ struct {
+ struct si_tcs_epilog_bits states;
+ } tcs_epilog;
+ struct {
+ struct si_ps_prolog_bits states;
+ unsigned num_input_sgprs:5;
+ unsigned num_input_vgprs:5;
+ /* Color interpolation and two-side color selection. */
+ unsigned colors_read:8; /* color input components read */
+ unsigned num_interp_inputs:5; /* BCOLOR is at this location */
+ unsigned face_vgpr_index:5;
+ char color_attr_index[2];
+ char color_interp_vgpr_index[2]; /* -1 == constant */
+ } ps_prolog;
+ struct {
+ struct si_ps_epilog_bits states;
+ unsigned colors_written:8;
+ unsigned writes_z:1;
+ unsigned writes_stencil:1;
+ unsigned writes_samplemask:1;
+ } ps_epilog;
+};
+
union si_shader_key {
struct {
- unsigned spi_shader_col_format;
- unsigned color_is_int8:8;
- unsigned last_cbuf:3;
- unsigned color_two_side:1;
- unsigned alpha_func:3;
- unsigned alpha_to_one:1;
- unsigned poly_stipple:1;
- unsigned poly_line_smoothing:1;
- unsigned clamp_color:1;
- unsigned force_persample_interp:1;
+ struct si_ps_prolog_bits prolog;
+ struct si_ps_epilog_bits epilog;
} ps;
struct {
- unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
- /* Mask of "get_unique_index" bits - which outputs are read
- * by the next stage (needed by ES).
- * This describes how outputs are laid out in memory. */
+ struct si_vs_prolog_bits prolog;
+ struct si_vs_epilog_bits epilog;
unsigned as_es:1; /* export shader */
unsigned as_ls:1; /* local shader */
- unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} vs;
struct {
- unsigned prim_mode:3;
+ struct si_tcs_epilog_bits epilog;
} tcs; /* tessellation control shader */
struct {
- /* Mask of "get_unique_index" bits - which outputs are read
- * by the next stage (needed by ES).
- * This describes how outputs are laid out in memory. */
+ struct si_vs_epilog_bits epilog; /* same as VS */
unsigned as_es:1; /* export shader */
- unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} tes; /* tessellation evaluation shader */
};
@@ -267,22 +341,42 @@ struct si_shader_config {
unsigned rsrc2;
};
+/* GCN-specific shader info. */
+struct si_shader_info {
+ ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
+ ubyte num_input_sgprs;
+ ubyte num_input_vgprs;
+ char face_vgpr_index;
+ bool uses_instanceid;
+ ubyte nr_pos_exports;
+ ubyte nr_param_exports;
+};
+
struct si_shader {
struct si_shader_selector *selector;
struct si_shader *next_variant;
+ struct si_shader_part *prolog;
+ struct si_shader_part *epilog;
+
struct si_shader *gs_copy_shader;
struct si_pm4_state *pm4;
struct r600_resource *bo;
struct r600_resource *scratch_bo;
union si_shader_key key;
+ bool is_binary_shared;
+
+ /* The following data is all that's needed for binary shaders. */
struct radeon_shader_binary binary;
struct si_shader_config config;
+ struct si_shader_info info;
+};
- unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
- bool uses_instanceid;
- unsigned nr_pos_exports;
- unsigned nr_param_exports;
+struct si_shader_part {
+ struct si_shader_part *next;
+ union si_shader_part_key key;
+ struct radeon_shader_binary binary;
+ struct si_shader_config config;
};
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
@@ -310,14 +404,19 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
static inline bool si_vs_exports_prim_id(struct si_shader *shader)
{
if (shader->selector->type == PIPE_SHADER_VERTEX)
- return shader->key.vs.export_prim_id;
+ return shader->key.vs.epilog.export_prim_id;
else if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
- return shader->key.tes.export_prim_id;
+ return shader->key.tes.epilog.export_prim_id;
else
return false;
}
-/* radeonsi_shader.c */
+/* si_shader.c */
+int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug);
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index bf780777b50..2dfdbeb8d8f 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -277,7 +277,7 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a
if (sctx->b.family == CHIP_STONEY) {
unsigned spi_shader_col_format =
sctx->ps_shader.cso ?
- sctx->ps_shader.current->key.ps.spi_shader_col_format : 0;
+ sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0;
unsigned sx_ps_downconvert = 0;
unsigned sx_blend_opt_epsilon = 0;
unsigned sx_blend_opt_control = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f64c4d45f1b..40792cbc1d5 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
/* si_state_shader.c */
bool si_update_shaders(struct si_context *sctx);
void si_init_shader_functions(struct si_context *sctx);
+bool si_init_shader_cache(struct si_screen *sscreen);
+void si_destroy_shader_cache(struct si_screen *sscreen);
/* si_state_draw.c */
void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 77a4e47c809..a6753a7a528 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -32,10 +32,221 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h"
+#include "util/hash_table.h"
+#include "util/u_hash.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
#include "util/u_simple_shaders.h"
+/* SHADER_CACHE */
+
+/**
+ * Return the TGSI binary in a buffer. The first 4 bytes contain its size as
+ * integer.
+ */
+static void *si_get_tgsi_binary(struct si_shader_selector *sel)
+{
+ unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
+ sizeof(struct tgsi_token);
+ unsigned size = 4 + tgsi_size + sizeof(sel->so);
+ char *result = (char*)MALLOC(size);
+
+ if (!result)
+ return NULL;
+
+ *((uint32_t*)result) = size;
+ memcpy(result + 4, sel->tokens, tgsi_size);
+ memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so));
+ return result;
+}
+
+/** Copy "data" to "ptr" and return the next dword following copied data. */
+static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size)
+{
+ memcpy(ptr, data, size);
+ ptr += DIV_ROUND_UP(size, 4);
+ return ptr;
+}
+
+/** Read data from "ptr". Return the next dword following the data. */
+static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size)
+{
+ memcpy(data, ptr, size);
+ ptr += DIV_ROUND_UP(size, 4);
+ return ptr;
+}
+
+/**
+ * Write the size as uint followed by the data. Return the next dword
+ * following the copied data.
+ */
+static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size)
+{
+ *ptr++ = size;
+ return write_data(ptr, data, size);
+}
+
+/**
+ * Read the size as uint followed by the data. Return both via parameters.
+ * Return the next dword following the data.
+ */
+static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
+{
+ *size = *ptr++;
+ assert(*data == NULL);
+ *data = malloc(*size);
+ return read_data(ptr, *data, *size);
+}
+
+/**
+ * Return the shader binary in a buffer. The first 4 bytes contain its size
+ * as integer.
+ */
+static void *si_get_shader_binary(struct si_shader *shader)
+{
+ /* There is always a size of data followed by the data itself. */
+ unsigned relocs_size = shader->binary.reloc_count *
+ sizeof(shader->binary.relocs[0]);
+ unsigned disasm_size = strlen(shader->binary.disasm_string) + 1;
+ unsigned size =
+ 4 + /* total size */
+ 4 + /* CRC32 of the data below */
+ align(sizeof(shader->config), 4) +
+ align(sizeof(shader->info), 4) +
+ 4 + align(shader->binary.code_size, 4) +
+ 4 + align(shader->binary.rodata_size, 4) +
+ 4 + align(relocs_size, 4) +
+ 4 + align(disasm_size, 4);
+ void *buffer = CALLOC(1, size);
+ uint32_t *ptr = (uint32_t*)buffer;
+
+ if (!buffer)
+ return NULL;
+
+ *ptr++ = size;
+ ptr++; /* CRC32 is calculated at the end. */
+
+ ptr = write_data(ptr, &shader->config, sizeof(shader->config));
+ ptr = write_data(ptr, &shader->info, sizeof(shader->info));
+ ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
+ ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size);
+ ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
+ ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
+ assert((char *)ptr - (char *)buffer == size);
+
+ /* Compute CRC32. */
+ ptr = (uint32_t*)buffer;
+ ptr++;
+ *ptr = util_hash_crc32(ptr + 1, size - 8);
+
+ return buffer;
+}
+
+static bool si_load_shader_binary(struct si_shader *shader, void *binary)
+{
+ uint32_t *ptr = (uint32_t*)binary;
+ uint32_t size = *ptr++;
+ uint32_t crc32 = *ptr++;
+ unsigned chunk_size;
+
+ if (util_hash_crc32(ptr, size - 8) != crc32) {
+ fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
+ return false;
+ }
+
+ ptr = read_data(ptr, &shader->config, sizeof(shader->config));
+ ptr = read_data(ptr, &shader->info, sizeof(shader->info));
+ ptr = read_chunk(ptr, (void**)&shader->binary.code,
+ &shader->binary.code_size);
+ ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
+ &shader->binary.rodata_size);
+ ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
+ shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]);
+ ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size);
+
+ return true;
+}
+
+/**
+ * Insert a shader into the cache. It's assumed the shader is not in the cache.
+ * Use si_shader_cache_load_shader before calling this.
+ *
+ * Returns false on failure, in which case the tgsi_binary should be freed.
+ */
+static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
+ void *tgsi_binary,
+ struct si_shader *shader)
+{
+ void *hw_binary = si_get_shader_binary(shader);
+
+ if (!hw_binary)
+ return false;
+
+ if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary,
+ hw_binary) == NULL) {
+ FREE(hw_binary);
+ return false;
+ }
+
+ return true;
+}
+
+static bool si_shader_cache_load_shader(struct si_screen *sscreen,
+ void *tgsi_binary,
+ struct si_shader *shader)
+{
+ struct hash_entry *entry =
+ _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary);
+ if (!entry)
+ return false;
+
+ return si_load_shader_binary(shader, entry->data);
+}
+
+static uint32_t si_shader_cache_key_hash(const void *key)
+{
+ /* The first dword is the key size. */
+ return util_hash_crc32(key, *(uint32_t*)key);
+}
+
+static bool si_shader_cache_key_equals(const void *a, const void *b)
+{
+ uint32_t *keya = (uint32_t*)a;
+ uint32_t *keyb = (uint32_t*)b;
+
+ /* The first dword is the key size. */
+ if (*keya != *keyb)
+ return false;
+
+ return memcmp(keya, keyb, *keya) == 0;
+}
+
+static void si_destroy_shader_cache_entry(struct hash_entry *entry)
+{
+ FREE((void*)entry->key);
+ FREE(entry->data);
+}
+
+bool si_init_shader_cache(struct si_screen *sscreen)
+{
+ pipe_mutex_init(sscreen->shader_cache_mutex);
+ sscreen->shader_cache =
+ _mesa_hash_table_create(NULL,
+ si_shader_cache_key_hash,
+ si_shader_cache_key_equals);
+ return sscreen->shader_cache != NULL;
+}
+
+void si_destroy_shader_cache(struct si_screen *sscreen)
+{
+ if (sscreen->shader_cache)
+ _mesa_hash_table_destroy(sscreen->shader_cache,
+ si_destroy_shader_cache_entry);
+ pipe_mutex_destroy(sscreen->shader_cache_mutex);
+}
+
+/* SHADER STATES */
+
static void si_set_tesseval_regs(struct si_shader *shader,
struct si_pm4_state *pm4)
{
@@ -108,7 +319,7 @@ static void si_shader_ls(struct si_shader *shader)
/* We need at least 2 components for LS.
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
num_user_sgprs = SI_LS_NUM_USER_SGPR;
num_sgprs = shader->config.num_sgprs;
@@ -181,7 +392,7 @@ static void si_shader_es(struct si_shader *shader)
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
num_user_sgprs = SI_ES_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = 3; /* all components are needed for TES */
@@ -347,7 +558,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
num_user_sgprs = SI_VS_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = 3; /* all components are needed for TES */
@@ -363,19 +574,19 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
assert(num_sgprs <= 104);
/* VS is required to export at least one param. */
- nparams = MAX2(shader->nr_param_exports, 1);
+ nparams = MAX2(shader->info.nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
S_0286C4_VS_EXPORT_COUNT(nparams - 1));
si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(shader->nr_pos_exports > 1 ?
+ S_02870C_POS1_EXPORT_FORMAT(shader->info.nr_pos_exports > 1 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(shader->nr_pos_exports > 2 ?
+ S_02870C_POS2_EXPORT_FORMAT(shader->info.nr_pos_exports > 2 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(shader->nr_pos_exports > 3 ?
+ S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE));
@@ -415,7 +626,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps)
unsigned num_colors = !!(info->colors_read & 0x0f) +
!!(info->colors_read & 0xf0);
unsigned num_interp = ps->selector->info.num_inputs +
- (ps->key.ps.color_two_side ? num_colors : 0);
+ (ps->key.ps.prolog.color_two_side ? num_colors : 0);
assert(num_interp <= 32);
return MIN2(num_interp, 32);
@@ -423,7 +634,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps)
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
{
- unsigned value = shader->key.ps.spi_shader_col_format;
+ unsigned value = shader->key.ps.epilog.spi_shader_col_format;
unsigned i, num_targets = (util_last_bit(value) + 3) / 4;
/* If the i-th target format is set, all previous target formats must
@@ -528,7 +739,7 @@ static void si_shader_ps(struct si_shader *shader)
if (!spi_shader_col_format &&
!info->writes_z && !info->writes_stencil && !info->writes_samplemask &&
(shader->selector->info.uses_kill ||
- shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
+ shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS))
spi_shader_col_format = V_028714_SPI_SHADER_32_R;
si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
@@ -638,11 +849,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
switch (sel->type) {
case PIPE_SHADER_VERTEX:
- if (sctx->vertex_elements)
- for (i = 0; i < sctx->vertex_elements->count; ++i)
- key->vs.instance_divisors[i] =
+ if (sctx->vertex_elements) {
+ unsigned count = MIN2(sel->info.num_inputs,
+ sctx->vertex_elements->count);
+ for (i = 0; i < count; ++i)
+ key->vs.prolog.instance_divisors[i] =
sctx->vertex_elements->elements[i].instance_divisor;
-
+ }
if (sctx->tes_shader.cso)
key->vs.as_ls = 1;
else if (sctx->gs_shader.cso)
@@ -650,17 +863,17 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
sctx->ps_shader.cso->info.uses_primid)
- key->vs.export_prim_id = 1;
+ key->vs.epilog.export_prim_id = 1;
break;
case PIPE_SHADER_TESS_CTRL:
- key->tcs.prim_mode =
+ key->tcs.epilog.prim_mode =
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
break;
case PIPE_SHADER_TESS_EVAL:
if (sctx->gs_shader.cso)
key->tes.as_es = 1;
else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
- key->tes.export_prim_id = 1;
+ key->tes.epilog.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
break;
@@ -670,13 +883,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
sel->info.colors_written == 0x1)
- key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
+ key->ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
if (blend) {
/* Select the shader color format based on whether
* blending or alpha are needed.
*/
- key->ps.spi_shader_col_format =
+ key->ps.epilog.spi_shader_col_format =
(blend->blend_enable_4bit & blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format_blend_alpha) |
(blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
@@ -686,26 +899,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format);
} else
- key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
+ key->ps.epilog.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
/* If alpha-to-coverage is enabled, we have to export alpha
* even if there is no color buffer.
*/
- if (!(key->ps.spi_shader_col_format & 0xf) &&
+ if (!(key->ps.epilog.spi_shader_col_format & 0xf) &&
blend && blend->alpha_to_coverage)
- key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
+ key->ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
/* On SI and CIK except Hawaii, the CB doesn't clamp outputs
* to the range supported by the type if a channel has less
* than 16 bits and the export format is 16_ABGR.
*/
if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
- key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
+ key->ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
/* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
- if (!key->ps.last_cbuf) {
- key->ps.spi_shader_col_format &= sel->colors_written_4bit;
- key->ps.color_is_int8 &= sel->info.colors_written;
+ if (!key->ps.epilog.last_cbuf) {
+ key->ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
+ key->ps.epilog.color_is_int8 &= sel->info.colors_written;
}
if (rs) {
@@ -714,31 +927,32 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
- key->ps.color_two_side = rs->two_side && sel->info.colors_read;
+ key->ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
if (sctx->queued.named.blend) {
- key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
- rs->multisample_enable &&
- !sctx->framebuffer.cb0_is_integer;
+ key->ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
+ rs->multisample_enable &&
+ !sctx->framebuffer.cb0_is_integer;
}
- key->ps.poly_stipple = rs->poly_stipple_enable && is_poly;
- key->ps.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
- (is_line && rs->line_smooth)) &&
- sctx->framebuffer.nr_samples <= 1;
- key->ps.clamp_color = rs->clamp_fragment_color;
-
- key->ps.force_persample_interp = rs->force_persample_interp &&
- rs->multisample_enable &&
- sctx->framebuffer.nr_samples > 1 &&
- sctx->ps_iter_samples > 1 &&
- (sel->info.uses_persp_center ||
- sel->info.uses_persp_centroid ||
- sel->info.uses_linear_center ||
- sel->info.uses_linear_centroid);
+ key->ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
+ key->ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
+ (is_line && rs->line_smooth)) &&
+ sctx->framebuffer.nr_samples <= 1;
+ key->ps.epilog.clamp_color = rs->clamp_fragment_color;
+
+ key->ps.prolog.force_persample_interp =
+ rs->force_persample_interp &&
+ rs->multisample_enable &&
+ sctx->framebuffer.nr_samples > 1 &&
+ sctx->ps_iter_samples > 1 &&
+ (sel->info.uses_persp_center ||
+ sel->info.uses_persp_centroid ||
+ sel->info.uses_linear_center ||
+ sel->info.uses_linear_centroid);
}
- key->ps.alpha_func = si_get_alpha_test_func(sctx);
+ key->ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
break;
}
default:
@@ -821,6 +1035,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
struct si_screen *sscreen = (struct si_screen *)ctx->screen;
+ struct si_context *sctx = (struct si_context*)ctx;
struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
int i;
@@ -900,6 +1115,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
sel->colors_written_4bit |= 0xf << (4 * i);
+
+ for (i = 0; i < sel->info.num_inputs; i++) {
+ if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ int index = sel->info.input_semantic_index[i];
+ sel->color_attr_index[index] = i;
+ }
+ }
break;
}
@@ -921,6 +1143,44 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
break;
}
+ /* Compile the main shader part for use with a prolog and/or epilog. */
+ if (sel->type != PIPE_SHADER_GEOMETRY &&
+ !sscreen->use_monolithic_shaders) {
+ struct si_shader *shader = CALLOC_STRUCT(si_shader);
+ void *tgsi_binary;
+
+ if (!shader)
+ goto error;
+
+ shader->selector = sel;
+
+ tgsi_binary = si_get_tgsi_binary(sel);
+
+ /* Try to load the shader from the shader cache. */
+ pipe_mutex_lock(sscreen->shader_cache_mutex);
+
+ if (tgsi_binary &&
+ si_shader_cache_load_shader(sscreen, tgsi_binary, shader)) {
+ FREE(tgsi_binary);
+ } else {
+ /* Compile the shader if it hasn't been loaded from the cache. */
+ if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false,
+ &sctx->b.debug) != 0) {
+ FREE(shader);
+ FREE(tgsi_binary);
+ pipe_mutex_unlock(sscreen->shader_cache_mutex);
+ goto error;
+ }
+
+ if (tgsi_binary &&
+ !si_shader_cache_insert_shader(sscreen, tgsi_binary, shader))
+ FREE(tgsi_binary);
+ }
+ pipe_mutex_unlock(sscreen->shader_cache_mutex);
+
+ sel->main_shader_part = shader;
+ }
+
/* Pre-compilation. */
if (sel->type == PIPE_SHADER_GEOMETRY ||
sscreen->b.debug_flags & DBG_PRECOMPILE) {
@@ -934,27 +1194,29 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
*/
switch (sel->type) {
case PIPE_SHADER_TESS_CTRL:
- key.tcs.prim_mode = PIPE_PRIM_TRIANGLES;
+ key.tcs.epilog.prim_mode = PIPE_PRIM_TRIANGLES;
break;
case PIPE_SHADER_FRAGMENT:
- key.ps.alpha_func = PIPE_FUNC_ALWAYS;
+ key.ps.epilog.alpha_func = PIPE_FUNC_ALWAYS;
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
- key.ps.spi_shader_col_format |=
+ key.ps.epilog.spi_shader_col_format |=
V_028710_SPI_SHADER_FP16_ABGR << (i * 4);
break;
}
- if (si_shader_select_with_key(ctx, &state, &key)) {
- fprintf(stderr, "radeonsi: can't create a shader\n");
- tgsi_free_tokens(sel->tokens);
- FREE(sel);
- return NULL;
- }
+ if (si_shader_select_with_key(ctx, &state, &key))
+ goto error;
}
pipe_mutex_init(sel->mutex);
return sel;
+
+error:
+ fprintf(stderr, "radeonsi: can't create a shader\n");
+ tgsi_free_tokens(sel->tokens);
+ FREE(sel);
+ return NULL;
}
/**
@@ -1119,6 +1381,9 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
p = c;
}
+ if (sel->main_shader_part)
+ si_delete_shader(sctx, sel->main_shader_part);
+
pipe_mutex_destroy(sel->mutex);
free(sel->tokens);
free(sel);
@@ -1144,14 +1409,14 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
for (j = 0; j < vsinfo->num_outputs; j++) {
if (name == vsinfo->output_semantic_name[j] &&
index == vsinfo->output_semantic_index[j]) {
- ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]);
+ ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[j]);
break;
}
}
if (name == TGSI_SEMANTIC_PRIMID)
/* PrimID is written after the last output. */
- ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
+ ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]);
else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
/* No corresponding output found, load defaults into input.
* Don't set any other bits.
@@ -1191,7 +1456,7 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
}
}
- if (ps->key.ps.color_two_side) {
+ if (ps->key.ps.prolog.color_two_side) {
unsigned bcol = TGSI_SEMANTIC_BCOLOR;
for (i = 0; i < 2; i++) {
@@ -1745,8 +2010,8 @@ bool si_update_shaders(struct si_context *sctx)
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
- if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
- sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
+ if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.epilog.poly_line_smoothing) {
+ sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.epilog.poly_line_smoothing;
si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->b.chip_class == SI)
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 80526ed4d15..fe6cf71a6e5 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -590,6 +590,16 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
}
else {
/* non-indexed drawing */
+ if (svga->state.hw_draw.ib_format != SVGA3D_FORMAT_INVALID) {
+ /* Unbind previously bound index buffer */
+ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, NULL,
+ SVGA3D_FORMAT_INVALID, 0);
+ if (ret != PIPE_OK)
+ return ret;
+ svga->state.hw_draw.ib_format = SVGA3D_FORMAT_INVALID;
+ svga->state.hw_draw.ib = NULL;
+ }
+
if (instance_count > 1) {
ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
vcount,
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index d5405f8eacf..c9abd49ec1e 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -468,12 +468,15 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -527,12 +530,15 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
return 0;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -619,12 +625,15 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
return SVGA3D_DX_MAX_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 1223e446055..0c5afeb4cf9 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -1782,7 +1782,7 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
{
- const unsigned n = emit->info.num_inputs + index;
+ const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index;
assert(index < Elements(emit->system_value_indexes));
emit->system_value_indexes[index] = n;
return n;
@@ -2446,7 +2446,7 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
else {
assert(emit->unit == PIPE_SHADER_VERTEX);
- for (i = 0; i < emit->info.num_inputs; i++) {
+ for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
unsigned usage_mask = emit->info.input_usage_mask[i];
unsigned index = i;
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 2ce2b3aef75..57f851833e5 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -50,7 +50,8 @@ struct trace_query
static inline struct trace_query *
-trace_query(struct pipe_query *query) {
+trace_query(struct pipe_query *query)
+{
return (struct trace_query *)query;
}
@@ -93,7 +94,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
return NULL;
assert(surface->texture);
- if(!surface->texture)
+ if (!surface->texture)
return surface;
tr_surf = trace_surface(surface);
@@ -105,7 +106,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
}
-static inline void
+static void
trace_context_draw_vbo(struct pipe_context *_pipe,
const struct pipe_draw_info *info)
{
@@ -135,7 +136,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
}
-static inline struct pipe_query *
+static struct pipe_query *
trace_context_create_query(struct pipe_context *_pipe,
unsigned query_type,
unsigned index)
@@ -173,7 +174,7 @@ trace_context_create_query(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_destroy_query(struct pipe_context *_pipe,
struct pipe_query *_query)
{
@@ -195,7 +196,7 @@ trace_context_destroy_query(struct pipe_context *_pipe,
}
-static inline boolean
+static boolean
trace_context_begin_query(struct pipe_context *_pipe,
struct pipe_query *query)
{
@@ -217,7 +218,7 @@ trace_context_begin_query(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_end_query(struct pipe_context *_pipe,
struct pipe_query *query)
{
@@ -237,7 +238,7 @@ trace_context_end_query(struct pipe_context *_pipe,
}
-static inline boolean
+static boolean
trace_context_get_query_result(struct pipe_context *_pipe,
struct pipe_query *_query,
boolean wait,
@@ -272,7 +273,7 @@ trace_context_get_query_result(struct pipe_context *_pipe,
}
-static inline void *
+static void *
trace_context_create_blend_state(struct pipe_context *_pipe,
const struct pipe_blend_state *state)
{
@@ -295,7 +296,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_blend_state(struct pipe_context *_pipe,
void *state)
{
@@ -313,7 +314,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_blend_state(struct pipe_context *_pipe,
void *state)
{
@@ -331,7 +332,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe,
}
-static inline void *
+static void *
trace_context_create_sampler_state(struct pipe_context *_pipe,
const struct pipe_sampler_state *state)
{
@@ -354,7 +355,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_sampler_states(struct pipe_context *_pipe,
unsigned shader,
unsigned start,
@@ -381,7 +382,7 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_sampler_state(struct pipe_context *_pipe,
void *state)
{
@@ -399,7 +400,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe,
}
-static inline void *
+static void *
trace_context_create_rasterizer_state(struct pipe_context *_pipe,
const struct pipe_rasterizer_state *state)
{
@@ -422,7 +423,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
void *state)
{
@@ -440,7 +441,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
void *state)
{
@@ -458,7 +459,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
}
-static inline void *
+static void *
trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
const struct pipe_depth_stencil_alpha_state *state)
{
@@ -481,7 +482,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
void *state)
{
@@ -499,7 +500,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
void *state)
{
@@ -518,7 +519,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
#define TRACE_SHADER_STATE(shader_type) \
- static inline void * \
+ static void * \
trace_context_create_##shader_type##_state(struct pipe_context *_pipe, \
const struct pipe_shader_state *state) \
{ \
@@ -534,7 +535,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
return result; \
} \
\
- static inline void \
+ static void \
trace_context_bind_##shader_type##_state(struct pipe_context *_pipe, \
void *state) \
{ \
@@ -547,7 +548,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
trace_dump_call_end(); \
} \
\
- static inline void \
+ static void \
trace_context_delete_##shader_type##_state(struct pipe_context *_pipe, \
void *state) \
{ \
@@ -570,6 +571,51 @@ TRACE_SHADER_STATE(tes)
static inline void *
+trace_context_create_compute_state(struct pipe_context *_pipe,
+ const struct pipe_compute_state *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+ void * result;
+
+ trace_dump_call_begin("pipe_context", "create_compute_state");
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(compute_state, state);
+ result = pipe->create_compute_state(pipe, state);
+ trace_dump_ret(ptr, result);
+ trace_dump_call_end();
+ return result;
+}
+
+static inline void
+trace_context_bind_compute_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_compute_state");
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+ pipe->bind_compute_state(pipe, state);
+ trace_dump_call_end();
+}
+
+static inline void
+trace_context_delete_compute_state(struct pipe_context *_pipe,
+ void *state)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "delete_compute_state");
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(ptr, state);
+ pipe->delete_compute_state(pipe, state);
+ trace_dump_call_end();
+}
+
+static void *
trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
unsigned num_elements,
const struct pipe_vertex_element *elements)
@@ -597,7 +643,7 @@ trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
void *state)
{
@@ -615,7 +661,7 @@ trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
void *state)
{
@@ -633,7 +679,7 @@ trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_blend_color(struct pipe_context *_pipe,
const struct pipe_blend_color *state)
{
@@ -651,7 +697,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_stencil_ref(struct pipe_context *_pipe,
const struct pipe_stencil_ref *state)
{
@@ -669,7 +715,7 @@ trace_context_set_stencil_ref(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_clip_state(struct pipe_context *_pipe,
const struct pipe_clip_state *state)
{
@@ -686,7 +732,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static inline void
+static void
trace_context_set_sample_mask(struct pipe_context *_pipe,
unsigned sample_mask)
{
@@ -703,7 +749,7 @@ trace_context_set_sample_mask(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static inline void
+static void
trace_context_set_constant_buffer(struct pipe_context *_pipe,
uint shader, uint index,
struct pipe_constant_buffer *constant_buffer)
@@ -731,7 +777,7 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_framebuffer_state(struct pipe_context *_pipe,
const struct pipe_framebuffer_state *state)
{
@@ -743,9 +789,9 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
/* Unwrap the input state */
memcpy(&unwrapped_state, state, sizeof(unwrapped_state));
- for(i = 0; i < state->nr_cbufs; ++i)
+ for (i = 0; i < state->nr_cbufs; ++i)
unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]);
- for(i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i)
+ for (i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i)
unwrapped_state.cbufs[i] = NULL;
unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf);
state = &unwrapped_state;
@@ -761,7 +807,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_polygon_stipple(struct pipe_context *_pipe,
const struct pipe_poly_stipple *state)
{
@@ -779,7 +825,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_scissor_states(struct pipe_context *_pipe,
unsigned start_slot,
unsigned num_scissors,
@@ -801,7 +847,7 @@ trace_context_set_scissor_states(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_viewport_states(struct pipe_context *_pipe,
unsigned start_slot,
unsigned num_viewports,
@@ -825,8 +871,8 @@ trace_context_set_viewport_states(struct pipe_context *_pipe,
static struct pipe_sampler_view *
trace_context_create_sampler_view(struct pipe_context *_pipe,
- struct pipe_resource *_resource,
- const struct pipe_sampler_view *templ)
+ struct pipe_resource *_resource,
+ const struct pipe_sampler_view *templ)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct trace_resource *tr_res = trace_resource(_resource);
@@ -868,7 +914,7 @@ trace_context_create_sampler_view(struct pipe_context *_pipe,
static void
trace_context_sampler_view_destroy(struct pipe_context *_pipe,
- struct pipe_sampler_view *_view)
+ struct pipe_sampler_view *_view)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct trace_sampler_view *tr_view = trace_sampler_view(_view);
@@ -910,7 +956,7 @@ trace_context_create_surface(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, resource);
-
+
trace_dump_arg_begin("surf_tmpl");
trace_dump_surface_template(surf_tmpl, resource->target);
trace_dump_arg_end();
@@ -948,7 +994,7 @@ trace_context_surface_destroy(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_sampler_views(struct pipe_context *_pipe,
unsigned shader,
unsigned start,
@@ -964,7 +1010,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe,
/* remove this when we have pipe->set_sampler_views(..., start, ...) */
assert(start == 0);
- for(i = 0; i < num; ++i) {
+ for (i = 0; i < num; ++i) {
tr_view = trace_sampler_view(views[i]);
unwrapped_views[i] = tr_view ? tr_view->sampler_view : NULL;
}
@@ -984,7 +1030,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_vertex_buffers(struct pipe_context *_pipe,
unsigned start_slot, unsigned num_buffers,
const struct pipe_vertex_buffer *buffers)
@@ -1018,7 +1064,7 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_set_index_buffer(struct pipe_context *_pipe,
const struct pipe_index_buffer *ib)
{
@@ -1043,7 +1089,7 @@ trace_context_set_index_buffer(struct pipe_context *_pipe,
}
-static inline struct pipe_stream_output_target *
+static struct pipe_stream_output_target *
trace_context_create_stream_output_target(struct pipe_context *_pipe,
struct pipe_resource *res,
unsigned buffer_offset,
@@ -1073,7 +1119,7 @@ trace_context_create_stream_output_target(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_stream_output_target_destroy(
struct pipe_context *_pipe,
struct pipe_stream_output_target *target)
@@ -1092,7 +1138,7 @@ trace_context_stream_output_target_destroy(
}
-static inline void
+static void
trace_context_set_stream_output_targets(struct pipe_context *_pipe,
unsigned num_targets,
struct pipe_stream_output_target **tgs,
@@ -1114,7 +1160,7 @@ trace_context_set_stream_output_targets(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *dst,
unsigned dst_level,
@@ -1149,7 +1195,7 @@ trace_context_resource_copy_region(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_blit(struct pipe_context *_pipe,
const struct pipe_blit_info *_info)
{
@@ -1191,7 +1237,7 @@ trace_context_flush_resource(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_clear(struct pipe_context *_pipe,
unsigned buffers,
const union pipe_color_union *color,
@@ -1220,7 +1266,7 @@ trace_context_clear(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_clear_render_target(struct pipe_context *_pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
@@ -1247,7 +1293,7 @@ trace_context_clear_render_target(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static inline void
+static void
trace_context_clear_depth_stencil(struct pipe_context *_pipe,
struct pipe_surface *dst,
unsigned clear_flags,
@@ -1306,7 +1352,7 @@ trace_context_clear_texture(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static inline void
+static void
trace_context_flush(struct pipe_context *_pipe,
struct pipe_fence_handle **fence,
unsigned flags)
@@ -1364,7 +1410,7 @@ trace_context_generate_mipmap(struct pipe_context *_pipe,
}
-static inline void
+static void
trace_context_destroy(struct pipe_context *_pipe)
{
struct trace_context *tr_ctx = trace_context(_pipe);
@@ -1414,7 +1460,7 @@ trace_context_transfer_map(struct pipe_context *_context,
*transfer = trace_transfer_create(tr_context, tr_res, result);
if (map) {
- if(usage & PIPE_TRANSFER_WRITE) {
+ if (usage & PIPE_TRANSFER_WRITE) {
trace_transfer(*transfer)->map = map;
}
}
@@ -1432,9 +1478,7 @@ trace_context_transfer_flush_region( struct pipe_context *_context,
struct pipe_context *context = tr_context->pipe;
struct pipe_transfer *transfer = tr_transfer->transfer;
- context->transfer_flush_region(context,
- transfer,
- box);
+ context->transfer_flush_region(context, transfer, box);
}
static void
@@ -1446,7 +1490,7 @@ trace_context_transfer_unmap(struct pipe_context *_context,
struct pipe_context *context = tr_ctx->pipe;
struct pipe_transfer *transfer = tr_trans->transfer;
- if(tr_trans->map) {
+ if (tr_trans->map) {
/*
* Fake a transfer_inline_write
*/
@@ -1525,15 +1569,16 @@ trace_context_transfer_inline_write(struct pipe_context *_context,
trace_dump_call_end();
- context->transfer_inline_write(context, resource,
- level, usage, box, data, stride, layer_stride);
+ context->transfer_inline_write(context, resource, level, usage, box,
+ data, stride, layer_stride);
}
-static void trace_context_render_condition(struct pipe_context *_context,
- struct pipe_query *query,
- boolean condition,
- uint mode)
+static void
+trace_context_render_condition(struct pipe_context *_context,
+ struct pipe_query *query,
+ boolean condition,
+ uint mode)
{
struct trace_context *tr_context = trace_context(_context);
struct pipe_context *context = tr_context->pipe;
@@ -1553,7 +1598,8 @@ static void trace_context_render_condition(struct pipe_context *_context,
}
-static void trace_context_texture_barrier(struct pipe_context *_context)
+static void
+trace_context_texture_barrier(struct pipe_context *_context)
{
struct trace_context *tr_context = trace_context(_context);
struct pipe_context *context = tr_context->pipe;
@@ -1568,8 +1614,9 @@ static void trace_context_texture_barrier(struct pipe_context *_context)
}
-static void trace_context_memory_barrier(struct pipe_context *_context,
- unsigned flags)
+static void
+trace_context_memory_barrier(struct pipe_context *_context,
+ unsigned flags)
{
struct trace_context *tr_context = trace_context(_context);
struct pipe_context *context = tr_context->pipe;
@@ -1583,9 +1630,10 @@ static void trace_context_memory_barrier(struct pipe_context *_context,
}
-static void trace_context_set_tess_state(struct pipe_context *_context,
- const float default_outer_level[4],
- const float default_inner_level[2])
+static void
+trace_context_set_tess_state(struct pipe_context *_context,
+ const float default_outer_level[4],
+ const float default_inner_level[2])
{
struct trace_context *tr_context = trace_context(_context);
struct pipe_context *context = tr_context->pipe;
@@ -1638,12 +1686,31 @@ static void trace_context_set_shader_buffers(struct pipe_context *_context,
FREE(_buffers);
}
+static void trace_context_launch_grid(struct pipe_context *_pipe,
+ const struct pipe_grid_info *info)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "launch_grid");
-static const struct debug_named_value rbug_blocker_flags[] = {
- {"before", 1, NULL},
- {"after", 2, NULL},
- DEBUG_NAMED_VALUE_END
-};
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(grid_info, info);
+
+ trace_dump_trace_flush();
+
+ if (info->indirect) {
+ struct pipe_grid_info _info;
+
+ memcpy(&_info, info, sizeof(_info));
+ _info.indirect = trace_resource_unwrap(tr_ctx, _info.indirect);
+ pipe->launch_grid(pipe, &_info);
+ } else {
+ pipe->launch_grid(pipe, info);
+ }
+
+ trace_dump_call_end();
+}
struct pipe_context *
trace_context_create(struct trace_screen *tr_scr,
@@ -1654,7 +1721,7 @@ trace_context_create(struct trace_screen *tr_scr,
if (!pipe)
goto error1;
- if(!trace_enabled())
+ if (!trace_enabled())
goto error1;
tr_ctx = CALLOC_STRUCT(trace_context);
@@ -1703,6 +1770,9 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(create_tes_state);
TR_CTX_INIT(bind_tes_state);
TR_CTX_INIT(delete_tes_state);
+ TR_CTX_INIT(create_compute_state);
+ TR_CTX_INIT(bind_compute_state);
+ TR_CTX_INIT(delete_compute_state);
TR_CTX_INIT(create_vertex_elements_state);
TR_CTX_INIT(bind_vertex_elements_state);
TR_CTX_INIT(delete_vertex_elements_state);
@@ -1738,6 +1808,7 @@ trace_context_create(struct trace_screen *tr_scr,
TR_CTX_INIT(memory_barrier);
TR_CTX_INIT(set_tess_state);
TR_CTX_INIT(set_shader_buffers);
+ TR_CTX_INIT(launch_grid);
TR_CTX_INIT(transfer_map);
TR_CTX_INIT(transfer_unmap);
@@ -1756,7 +1827,7 @@ error1:
/**
- * Sanity checker: check that the given context really is a
+ * Sanity checker: check that the given context really is a
* trace context (and not the wrapped driver's context).
*/
void
@@ -1765,4 +1836,3 @@ trace_context_check(const struct pipe_context *pipe)
struct trace_context *tr_ctx = (struct trace_context *) pipe;
assert(tr_ctx->base.destroy == trace_context_destroy);
}
-
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index cfbf53cf767..0627e5ab5d7 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -305,6 +305,36 @@ void trace_dump_shader_state(const struct pipe_shader_state *state)
}
+void trace_dump_compute_state(const struct pipe_compute_state *state)
+{
+ if (!trace_dumping_enabled_locked())
+ return;
+
+ if (!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_compute_state");
+
+ trace_dump_member_begin("prog");
+ if (state->prog) {
+ static char str[64 * 1024];
+ tgsi_dump_str(state->prog, 0, str, sizeof(str));
+ trace_dump_string(str);
+ } else {
+ trace_dump_null();
+ }
+ trace_dump_member_end();
+
+ trace_dump_member(uint, state, req_local_mem);
+ trace_dump_member(uint, state, req_private_mem);
+ trace_dump_member(uint, state, req_input_mem);
+
+ trace_dump_struct_end();
+}
+
+
void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state)
{
unsigned i;
@@ -864,3 +894,33 @@ trace_dump_query_result(unsigned query_type,
break;
}
}
+
+void trace_dump_grid_info(const struct pipe_grid_info *state)
+{
+ if (!trace_dumping_enabled_locked())
+ return;
+
+ if (!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_grid_info");
+
+ trace_dump_member(uint, state, pc);
+ trace_dump_member(ptr, state, input);
+
+ trace_dump_member_begin("block");
+ trace_dump_array(uint, state->block, Elements(state->block));
+ trace_dump_member_end();
+
+ trace_dump_member_begin("grid");
+ trace_dump_array(uint, state->grid, Elements(state->grid));
+ trace_dump_member_end();
+
+ trace_dump_member(ptr, state, indirect);
+ trace_dump_member(uint, state, indirect_offset);
+
+ trace_dump_struct_end();
+}
+
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 4f4ade155bc..ee0720d8ac8 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -50,6 +50,8 @@ void trace_dump_token(const struct tgsi_token *token);
void trace_dump_shader_state(const struct pipe_shader_state *state);
+void trace_dump_compute_state(const struct pipe_compute_state *state);
+
void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state);
void trace_dump_blend_state(const struct pipe_blend_state *state);
@@ -87,4 +89,6 @@ void trace_dump_blit_info(const struct pipe_blit_info *);
void trace_dump_query_result(unsigned query_type,
const union pipe_query_result *result);
+void trace_dump_grid_info(const struct pipe_grid_info *state);
+
#endif /* TR_STATE_H */
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index ff057e2a4a4..0612109c800 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -173,6 +173,30 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
}
+static int
+trace_screen_get_compute_param(struct pipe_screen *_screen,
+ enum pipe_compute_cap param, void *data)
+{
+ struct trace_screen *tr_scr = trace_screen(_screen);
+ struct pipe_screen *screen = tr_scr->screen;
+ int result;
+
+ trace_dump_call_begin("pipe_screen", "get_compute_param");
+
+ trace_dump_arg(ptr, screen);
+ trace_dump_arg(int, param);
+ trace_dump_arg(ptr, data);
+
+ result = screen->get_compute_param(screen, param, data);
+
+ trace_dump_ret(int, result);
+
+ trace_dump_call_end();
+
+ return result;
+}
+
+
static boolean
trace_screen_is_format_supported(struct pipe_screen *_screen,
enum pipe_format format,
@@ -472,6 +496,7 @@ trace_screen_create(struct pipe_screen *screen)
tr_scr->base.get_param = trace_screen_get_param;
tr_scr->base.get_shader_param = trace_screen_get_shader_param;
tr_scr->base.get_paramf = trace_screen_get_paramf;
+ tr_scr->base.get_compute_param = trace_screen_get_compute_param;
tr_scr->base.is_format_supported = trace_screen_is_format_supported;
assert(screen->context_create);
tr_scr->base.context_create = trace_screen_context_create;
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index efbb69b71a7..f9eb0e151c5 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -193,6 +193,7 @@ qir_is_raw_mov(struct qinst *inst)
return ((inst->op == QOP_MOV ||
inst->op == QOP_FMOV ||
inst->op == QOP_MMOV) &&
+ inst->cond == QPU_COND_ALWAYS &&
!inst->dst.pack &&
!inst->src[0].pack);
}
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index 2f280c54523..ee1e9aafbb9 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -575,7 +575,7 @@ void
qir_schedule_instructions(struct vc4_compile *c)
{
void *mem_ctx = ralloc_context(NULL);
- struct schedule_state state = { 0 };
+ struct schedule_state state = { { 0 } };
if (debug) {
fprintf(stderr, "Pre-schedule instructions\n");
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index b06702afea2..450b97fc014 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -312,7 +312,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
}
- bool handled_qinst_cond = true;
+ bool handled_qinst_cond = false;
switch (qinst->op) {
case QOP_RCP:
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index b19d31af6ac..a4b3efcfda3 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -357,9 +357,12 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return VC4_MAX_TEXTURE_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
default:
fprintf(stderr, "unknown shader param %d\n", param);