summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h5
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h5
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h65
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c26
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h5
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h5
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c6
-rw-r--r--src/gallium/drivers/i915/i915_screen.c1
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c44
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c36
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.c19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.h5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp18
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp108
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp9
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp57
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp25
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c13
-rw-r--r--src/gallium/drivers/nouveau/nouveau_context.h5
-rw-r--r--src/gallium/drivers/nouveau/nouveau_fence.c78
-rw-r--r--src/gallium/drivers/nouveau/nouveau_fence.h5
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c21
-rw-r--r--src/gallium/drivers/nouveau/nouveau_vp3_video.c1
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_context.c1
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c5
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_formats.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.h7
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c5
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_shader_state.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c92
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c8
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c9
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c7
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c4
-rw-r--r--src/gallium/drivers/r300/r300_screen.c1
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c14
-rw-r--r--src/gallium/drivers/r600/evergreen_hw_context.c10
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c107
-rw-r--r--src/gallium/drivers/r600/evergreend.h9
-rw-r--r--src/gallium/drivers/r600/r600_blit.c15
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c44
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c13
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h3
-rw-r--r--src/gallium/drivers/r600/r600_shader.c1
-rw-r--r--src/gallium/drivers/r600/r600_shader.h2
-rw-r--r--src/gallium/drivers/r600/r600_state.c91
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c63
-rw-r--r--src/gallium/drivers/r600/r600d.h8
-rw-r--r--src/gallium/drivers/radeon/r600_buffer_common.c32
-rw-r--r--src/gallium/drivers/radeon/r600_cs.h15
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c124
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h23
-rw-r--r--src/gallium/drivers/radeon/r600_query.c144
-rw-r--r--src/gallium/drivers/radeon/r600_streamout.c18
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c2
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c6
-rw-r--r--src/gallium/drivers/radeon/radeon_video.c3
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c14
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c114
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c24
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c231
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c42
-rw-r--r--src/gallium/drivers/radeonsi/si_dma.c14
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c40
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h17
-rw-r--r--src/gallium/drivers/radeonsi/si_pm4.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c67
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h28
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c48
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c43
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c218
-rw-r--r--src/gallium/drivers/radeonsi/sid.h322
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c1
-rw-r--r--src/gallium/drivers/svga/svga_draw_arrays.c3
-rw-r--r--src/gallium/drivers/svga/svga_draw_elements.c3
-rw-r--r--src/gallium/drivers/svga/svga_screen.c1
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c35
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.c40
-rw-r--r--src/gallium/drivers/vc4/vc4_cl_dump.c17
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c48
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_state.c21
-rw-r--r--src/gallium/drivers/virgl/virgl_screen.c1
113 files changed, 2022 insertions, 916 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index 2853787a340..ef235734755 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index 4bbcb33614c..b5e1ddadde0 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 819f5b14a17..9f970365464 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
@@ -489,8 +490,8 @@ static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r
return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
}
-#define REG_A4XX_RB_BLEND_RED 0x000020f3
-#define A4XX_RB_BLEND_RED_UINT__MASK 0x00007fff
+#define REG_A4XX_RB_BLEND_RED 0x000020f0
+#define A4XX_RB_BLEND_RED_UINT__MASK 0x0000ffff
#define A4XX_RB_BLEND_RED_UINT__SHIFT 0
static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val)
{
@@ -503,8 +504,16 @@ static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val)
return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK;
}
-#define REG_A4XX_RB_BLEND_GREEN 0x000020f4
-#define A4XX_RB_BLEND_GREEN_UINT__MASK 0x00007fff
+#define REG_A4XX_RB_BLEND_RED_F32 0x000020f1
+#define A4XX_RB_BLEND_RED_F32__MASK 0xffffffff
+#define A4XX_RB_BLEND_RED_F32__SHIFT 0
+static inline uint32_t A4XX_RB_BLEND_RED_F32(float val)
+{
+ return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_GREEN 0x000020f2
+#define A4XX_RB_BLEND_GREEN_UINT__MASK 0x0000ffff
#define A4XX_RB_BLEND_GREEN_UINT__SHIFT 0
static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val)
{
@@ -517,8 +526,16 @@ static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val)
return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK;
}
-#define REG_A4XX_RB_BLEND_BLUE 0x000020f5
-#define A4XX_RB_BLEND_BLUE_UINT__MASK 0x00007fff
+#define REG_A4XX_RB_BLEND_GREEN_F32 0x000020f3
+#define A4XX_RB_BLEND_GREEN_F32__MASK 0xffffffff
+#define A4XX_RB_BLEND_GREEN_F32__SHIFT 0
+static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val)
+{
+ return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_BLUE 0x000020f4
+#define A4XX_RB_BLEND_BLUE_UINT__MASK 0x0000ffff
#define A4XX_RB_BLEND_BLUE_UINT__SHIFT 0
static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val)
{
@@ -531,8 +548,16 @@ static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val)
return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK;
}
+#define REG_A4XX_RB_BLEND_BLUE_F32 0x000020f5
+#define A4XX_RB_BLEND_BLUE_F32__MASK 0xffffffff
+#define A4XX_RB_BLEND_BLUE_F32__SHIFT 0
+static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val)
+{
+ return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK;
+}
+
#define REG_A4XX_RB_BLEND_ALPHA 0x000020f6
-#define A4XX_RB_BLEND_ALPHA_UINT__MASK 0x00007fff
+#define A4XX_RB_BLEND_ALPHA_UINT__MASK 0x0000ffff
#define A4XX_RB_BLEND_ALPHA_UINT__SHIFT 0
static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val)
{
@@ -545,6 +570,14 @@ static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val)
return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK;
}
+#define REG_A4XX_RB_BLEND_ALPHA_F32 0x000020f7
+#define A4XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff
+#define A4XX_RB_BLEND_ALPHA_F32__SHIFT 0
+static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val)
+{
+ return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK;
+}
+
#define REG_A4XX_RB_ALPHA_CONTROL 0x000020f8
#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff
#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0
@@ -2645,20 +2678,6 @@ static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
#define REG_A4XX_UNKNOWN_20EF 0x000020ef
-#define REG_A4XX_UNKNOWN_20F0 0x000020f0
-
-#define REG_A4XX_UNKNOWN_20F1 0x000020f1
-
-#define REG_A4XX_UNKNOWN_20F2 0x000020f2
-
-#define REG_A4XX_UNKNOWN_20F7 0x000020f7
-#define A4XX_UNKNOWN_20F7__MASK 0xffffffff
-#define A4XX_UNKNOWN_20F7__SHIFT 0
-static inline uint32_t A4XX_UNKNOWN_20F7(float val)
-{
- return ((fui(val)) << A4XX_UNKNOWN_20F7__SHIFT) & A4XX_UNKNOWN_20F7__MASK;
-}
-
#define REG_A4XX_UNKNOWN_2152 0x00002152
#define REG_A4XX_UNKNOWN_2153 0x00002153
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index cf5dd7b0f17..26b58718cd8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -613,15 +613,19 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & FD_DIRTY_BLEND_COLOR) {
struct pipe_blend_color *bcolor = &ctx->blend_color;
- OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
- OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
+ OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
+ OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) |
A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
- OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
+ OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) |
A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
- OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
+ OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) |
A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
- OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
+ OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) |
A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
+ OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
}
if (dirty & FD_DIRTY_VERTTEX) {
@@ -699,15 +703,6 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
- OUT_RING(ring, 0x00000000);
-
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
- OUT_RING(ring, 0x00000000);
-
OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
A4XX_RB_BLEND_RED_FLOAT(0.0));
@@ -718,9 +713,6 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
- OUT_RING(ring, 0x3f800000);
-
OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
OUT_RING(ring, 0x00000000);
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index 906368c0efa..ca3d2ac3fca 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 490cf5beaf0..f095e3061b2 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 9f8c33263fb..56d1834ef9c 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -239,6 +239,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -549,6 +550,7 @@ fd_screen_create(struct fd_device *dev)
case 220:
fd2_screen_init(pscreen);
break;
+ case 305:
case 307:
case 320:
case 330:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 8c9234b3847..157dc73a3c6 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -2325,17 +2325,17 @@ emit_instructions(struct ir3_compile *ctx)
}
/* Setup inputs: */
- foreach_list_typed(nir_variable, var, node, &ctx->s->inputs) {
+ nir_foreach_variable(var, &ctx->s->inputs) {
setup_input(ctx, var);
}
/* Setup outputs: */
- foreach_list_typed(nir_variable, var, node, &ctx->s->outputs) {
+ nir_foreach_variable(var, &ctx->s->outputs) {
setup_output(ctx, var);
}
/* Setup variables (which should only be arrays): */
- foreach_list_typed(nir_variable, var, node, &ctx->s->globals) {
+ nir_foreach_variable(var, &ctx->s->globals) {
declare_var(ctx, var);
}
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 2d2fd375656..a5b161882cd 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -253,6 +253,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 888f7aa6782..cfa2fb41152 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -475,6 +475,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index df262fa4716..ceac86abe1d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -746,7 +746,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
pos_init(bld, x0, y0);
- if (coeff_type.length > 4) {
+ /*
+ * Simple method (single step interpolation) may be slower if vector length
+ * is just 4, but the results are different (generally less accurate) with
+ * the other method, so always use more accurate version.
+ */
+ if (1) {
bld->simple_interp = TRUE;
{
/* XXX this should use a global static table */
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 9acde4f1b06..b915c1d64ff 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -36,6 +36,7 @@
#include "util/u_memory.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_format.h"
#include "lp_context.h"
#include "lp_jit.h"
@@ -208,6 +209,8 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT];
LLVMTypeRef thread_data_type;
+ elem_types[LP_JIT_THREAD_DATA_CACHE] =
+ LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
LLVMInt32TypeInContext(lc);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 097fa7dce7c..9db26f2cba9 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -43,6 +43,7 @@
#include "lp_texture.h"
+struct lp_build_format_cache;
struct lp_fragment_shader_variant;
struct llvmpipe_screen;
@@ -189,6 +190,7 @@ enum {
struct lp_jit_thread_data
{
+ struct lp_build_format_cache *cache;
uint64_t vis_counter;
/*
@@ -201,12 +203,16 @@ struct lp_jit_thread_data
enum {
- LP_JIT_THREAD_DATA_COUNTER = 0,
+ LP_JIT_THREAD_DATA_CACHE = 0,
+ LP_JIT_THREAD_DATA_COUNTER,
LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX,
LP_JIT_THREAD_DATA_COUNT
};
+#define lp_jit_thread_data_cache(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_THREAD_DATA_CACHE, "cache")
+
#define lp_jit_thread_data_counter(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter")
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index c726707c062..d22e50777fa 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -43,6 +43,7 @@
#include "lp_query.h"
#include "lp_rast.h"
#include "lp_rast_priv.h"
+#include "gallivm/lp_bld_format.h"
#include "gallivm/lp_bld_debug.h"
#include "lp_scene.h"
#include "lp_tex_sample.h"
@@ -664,6 +665,17 @@ rasterize_scene(struct lp_rasterizer_task *task,
{
task->scene = scene;
+ /* Clear the cache tags. This should not always be necessary but
+ simpler for now. */
+#if LP_USE_TEXTURE_CACHE
+ memset(task->thread_data.cache->cache_tags, 0,
+ sizeof(task->thread_data.cache->cache_tags));
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+ task->thread_data.cache->cache_access_total = 0;
+ task->thread_data.cache->cache_access_miss = 0;
+#endif
+#endif
+
if (!task->rast->no_rast && !scene->discard) {
/* loop over scene bins, rasterize each */
{
@@ -679,6 +691,20 @@ rasterize_scene(struct lp_rasterizer_task *task,
}
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+ {
+ uint64_t total, miss;
+ total = task->thread_data.cache->cache_access_total;
+ miss = task->thread_data.cache->cache_access_miss;
+ if (total) {
+ debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
+ task->thread_index, (long long unsigned)total,
+ (long long unsigned)miss,
+ (float)(total - miss)/(float)total);
+ }
+ }
+#endif
+
if (scene->fence) {
lp_fence_signal(scene->fence);
}
@@ -866,10 +892,15 @@ lp_rast_create( unsigned num_threads )
goto no_full_scenes;
}
- for (i = 0; i < Elements(rast->tasks); i++) {
+ for (i = 0; i < MAX2(1, num_threads); i++) {
struct lp_rasterizer_task *task = &rast->tasks[i];
task->rast = rast;
task->thread_index = i;
+ task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache),
+ 16);
+ if (!task->thread_data.cache) {
+ goto no_thread_data_cache;
+ }
}
rast->num_threads = num_threads;
@@ -885,6 +916,14 @@ lp_rast_create( unsigned num_threads )
return rast;
+no_thread_data_cache:
+ for (i = 0; i < MAX2(1, rast->num_threads); i++) {
+ if (rast->tasks[i].thread_data.cache) {
+ align_free(rast->tasks[i].thread_data.cache);
+ }
+ }
+
+ lp_scene_queue_destroy(rast->full_scenes);
no_full_scenes:
FREE(rast);
no_rast:
@@ -923,6 +962,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
pipe_semaphore_destroy(&rast->tasks[i].work_ready);
pipe_semaphore_destroy(&rast->tasks[i].work_done);
}
+ for (i = 0; i < MAX2(1, rast->num_threads); i++) {
+ align_free(rast->tasks[i].thread_data.cache);
+ }
/* for synchronizing rasterization threads */
pipe_barrier_destroy( &rast->barrier );
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index d1c50aefc84..9f5e7378ac7 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -300,6 +300,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index fd6c49aacd8..f55f6b4fa4f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -421,7 +421,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
lp_build_tgsi_soa(gallivm, tokens, type, &mask,
consts_ptr, num_consts_ptr, &system_values,
interp->inputs,
- outputs, context_ptr,
+ outputs, context_ptr, thread_data_ptr,
sampler, &shader->info.base, NULL);
/* Alpha test */
@@ -2303,8 +2303,8 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(dady_ptr, "dady");
lp_build_name(color_ptr_ptr, "color_ptr_ptr");
lp_build_name(depth_ptr, "depth");
- lp_build_name(thread_data_ptr, "thread_data");
lp_build_name(mask_input, "mask_input");
+ lp_build_name(thread_data_ptr, "thread_data");
lp_build_name(stride_ptr, "stride_ptr");
lp_build_name(depth_stride, "depth_stride");
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index d9abd1ae37c..0640a217874 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -44,6 +44,9 @@
#include "lp_test.h"
+#define USE_TEXTURE_CACHE 1
+
+static struct lp_build_format_cache *cache_ptr;
void
write_tsv_header(FILE *fp)
@@ -71,7 +74,7 @@ write_tsv_row(FILE *fp,
typedef void
(*fetch_ptr_t)(void *unpacked, const void *packed,
- unsigned i, unsigned j);
+ unsigned i, unsigned j, struct lp_build_format_cache *cache);
static LLVMValueRef
@@ -83,7 +86,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
LLVMContextRef context = gallivm->context;
LLVMModuleRef module = gallivm->module;
LLVMBuilderRef builder = gallivm->builder;
- LLVMTypeRef args[4];
+ LLVMTypeRef args[5];
LLVMValueRef func;
LLVMValueRef packed_ptr;
LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context));
@@ -92,6 +95,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
LLVMValueRef j;
LLVMBasicBlockRef block;
LLVMValueRef rgba;
+ LLVMValueRef cache = NULL;
util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name,
type.floating ? "float" : "unorm8");
@@ -99,6 +103,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0);
args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0);
args[3] = args[2] = LLVMInt32TypeInContext(context);
+ args[4] = LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
func = LLVMAddFunction(module, name,
LLVMFunctionType(LLVMVoidTypeInContext(context),
@@ -109,11 +114,15 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
i = LLVMGetParam(func, 2);
j = LLVMGetParam(func, 3);
+ if (cache_ptr) {
+ cache = LLVMGetParam(func, 4);
+ }
+
block = LLVMAppendBasicBlockInContext(context, func, "entry");
LLVMPositionBuilderAtEnd(builder, block);
rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE,
- packed_ptr, offset, i, j);
+ packed_ptr, offset, i, j, cache);
LLVMBuildStore(builder, rgba, rgba_ptr);
@@ -170,7 +179,7 @@ test_format_float(unsigned verbose, FILE *fp,
memset(unpacked, 0, sizeof unpacked);
- fetch_ptr(unpacked, packed, j, i);
+ fetch_ptr(unpacked, packed, j, i, cache_ptr);
for(k = 0; k < 4; ++k) {
if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
@@ -187,6 +196,11 @@ test_format_float(unsigned verbose, FILE *fp,
}
}
+ /* Ignore errors in S3TC for now */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ match = TRUE;
+ }
+
if (!match) {
printf("FAILED\n");
printf(" Packed: %02x %02x %02x %02x\n",
@@ -261,7 +275,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
memset(unpacked, 0, sizeof unpacked);
- fetch_ptr(unpacked, packed, j, i);
+ fetch_ptr(unpacked, packed, j, i, cache_ptr);
match = TRUE;
for(k = 0; k < 4; ++k) {
@@ -277,6 +291,11 @@ test_format_unorm8(unsigned verbose, FILE *fp,
match = FALSE;
}
+ /* Ignore errors in S3TC as we only implement a poor man approach */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ match = TRUE;
+ }
+
if (!match) {
printf("FAILED\n");
printf(" Packed: %02x %02x %02x %02x\n",
@@ -334,6 +353,10 @@ test_all(unsigned verbose, FILE *fp)
util_format_s3tc_init();
+#if USE_TEXTURE_CACHE
+ cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
+#endif
+
for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
const struct util_format_description *format_desc;
@@ -363,6 +386,9 @@ test_all(unsigned verbose, FILE *fp)
success = FALSE;
}
}
+#if USE_TEXTURE_CACHE
+ align_free(cache_ptr);
+#endif
return success;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
index 316d1c55082..217abe963b7 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
@@ -221,6 +221,21 @@ LP_LLVM_SAMPLER_MEMBER(lod_bias, LP_JIT_SAMPLER_LOD_BIAS, TRUE)
LP_LLVM_SAMPLER_MEMBER(border_color, LP_JIT_SAMPLER_BORDER_COLOR, FALSE)
+#if LP_USE_TEXTURE_CACHE
+static LLVMValueRef
+lp_llvm_texture_cache_ptr(const struct lp_sampler_dynamic_state *base,
+ struct gallivm_state *gallivm,
+ LLVMValueRef thread_data_ptr,
+ unsigned unit)
+{
+ /* We use the same cache for all units */
+ (void)unit;
+
+ return lp_jit_thread_data_cache(gallivm, thread_data_ptr);
+}
+#endif
+
+
static void
lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
{
@@ -314,6 +329,10 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state)
sampler->dynamic_state.base.lod_bias = lp_llvm_sampler_lod_bias;
sampler->dynamic_state.base.border_color = lp_llvm_sampler_border_color;
+#if LP_USE_TEXTURE_CACHE
+ sampler->dynamic_state.base.cache_ptr = lp_llvm_texture_cache_ptr;
+#endif
+
sampler->dynamic_state.static_state = static_state;
return &sampler->base;
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index f4aff226ce1..e26d608c9eb 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -34,6 +34,10 @@
struct lp_sampler_static_state;
+/**
+ * Whether texture cache is used for s3tc textures.
+ */
+#define LP_USE_TEXTURE_CACHE 0
/**
* Pure-LLVM texture sampling code generator.
@@ -42,5 +46,4 @@ struct lp_sampler_static_state;
struct lp_build_sampler_soa *
lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key);
-
#endif /* LP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 7862ac8f217..82868814581 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -805,7 +805,7 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
#endif
screen->resource_create = llvmpipe_resource_create;
- screen->resource_create_front = llvmpipe_resource_create_front;
+/* screen->resource_create_front = llvmpipe_resource_create_front; */
screen->resource_destroy = llvmpipe_resource_destroy;
screen->resource_from_handle = llvmpipe_resource_from_handle;
screen->resource_get_handle = llvmpipe_resource_get_handle;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index f6e93081e76..d09a0ab0610 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -389,6 +389,7 @@ enum SVSemantic
SV_SBASE,
SV_VERTEX_STRIDE,
SV_INVOCATION_INFO,
+ SV_THREAD_KILL,
SV_UNDEFINED,
SV_LAST
};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
index 19418c0e0f1..dca799dd9b5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -392,6 +392,12 @@ BuildUtil::mkImm(float f)
return mkImm(u.u32);
}
+ImmediateValue *
+BuildUtil::mkImm(double d)
+{
+ return new_ImmediateValue(prog, d);
+}
+
Value *
BuildUtil::loadImm(Value *dst, float f)
{
@@ -399,6 +405,12 @@ BuildUtil::loadImm(Value *dst, float f)
}
Value *
+BuildUtil::loadImm(Value *dst, double d)
+{
+ return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(), mkImm(d));
+}
+
+Value *
BuildUtil::loadImm(Value *dst, uint32_t u)
{
return mkOp1v(OP_MOV, TYPE_U32, dst ? dst : getScratch(), mkImm(u));
@@ -555,6 +567,12 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
switch (i->dType) {
case TYPE_U64: hTy = TYPE_U32; break;
case TYPE_S64: hTy = TYPE_S32; break;
+ case TYPE_F64:
+ if (i->op == OP_MOV) {
+ hTy = TYPE_U32;
+ break;
+ }
+ /* fallthrough */
default:
return NULL;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
index 0d544581697..8f3bf77949c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
@@ -90,12 +90,14 @@ public:
void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2);
ImmediateValue *mkImm(float);
+ ImmediateValue *mkImm(double);
ImmediateValue *mkImm(uint32_t);
ImmediateValue *mkImm(uint64_t);
ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); }
Value *loadImm(Value *dst, float);
+ Value *loadImm(Value *dst, double);
Value *loadImm(Value *dst, uint32_t);
Value *loadImm(Value *dst, uint64_t);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index c0cab3299b5..b49bf9d53bc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -96,6 +96,7 @@ struct nv50_ir_prog_info
uint32_t tlsSpace; /* required local memory per thread */
uint32_t *code;
uint32_t codeSize;
+ uint32_t instructions;
uint8_t sourceRep; /* NV50_PROGRAM_IR */
const void *source;
void *relocData;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index d712c9c300a..b163cd2db4a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1644,6 +1644,7 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
case SV_VERTEX_COUNT: return 0x10;
case SV_INVOCATION_ID: return 0x11;
case SV_YDIR: return 0x12;
+ case SV_THREAD_KILL: return 0x13;
case SV_TID: return 0x21 + SDATA(ref).sv.index;
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index a327d572470..e9ddd366391 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -244,6 +244,7 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val)
case SV_LANEID : id = 0x00; break;
case SV_VERTEX_COUNT : id = 0x10; break;
case SV_INVOCATION_ID : id = 0x11; break;
+ case SV_THREAD_KILL : id = 0x13; break;
case SV_INVOCATION_INFO: id = 0x1d; break;
default:
assert(!"invalid system value");
@@ -310,9 +311,12 @@ CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
uint32_t val = imm->reg.data.u32;
if (len == 19) {
- if (isFloatType(insn->sType)) {
+ if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
assert(!(val & 0x00000fff));
val >>= 12;
+ } else if (insn->sType == TYPE_F64) {
+ assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
+ val = imm->reg.data.u64 >> 44;
}
assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
emitField( 56, 1, (val & 0x80000) >> 19);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 9f1e4b803d5..0b5288218d1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -96,9 +96,12 @@ private:
void emitUADD(const Instruction *);
void emitAADD(const Instruction *);
void emitFADD(const Instruction *);
+ void emitDADD(const Instruction *);
void emitIMUL(const Instruction *);
void emitFMUL(const Instruction *);
+ void emitDMUL(const Instruction *);
void emitFMAD(const Instruction *);
+ void emitDMAD(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
@@ -438,9 +441,9 @@ CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
return;
if ((mode & 3) == 1) {
- const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
+ const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
- switch (i->getSrc(0)->reg.type) {
+ switch (i->sType) {
case TYPE_U8:
break;
case TYPE_U16:
@@ -954,11 +957,13 @@ CodeEmitterNV50::emitMINMAX(const Instruction *i)
assert(0);
break;
}
- code[1] |= i->src(0).mod.abs() << 20;
- code[1] |= i->src(0).mod.neg() << 26;
- code[1] |= i->src(1).mod.abs() << 19;
- code[1] |= i->src(1).mod.neg() << 27;
}
+
+ code[1] |= i->src(0).mod.abs() << 20;
+ code[1] |= i->src(0).mod.neg() << 26;
+ code[1] |= i->src(1).mod.abs() << 19;
+ code[1] |= i->src(1).mod.neg() << 27;
+
emitForm_MAD(i);
}
@@ -994,6 +999,26 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
}
void
+CodeEmitterNV50::emitDMAD(const Instruction *i)
+{
+ const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
+ const int neg_add = i->src(2).mod.neg();
+
+ assert(i->encSize == 8);
+ assert(!i->saturate);
+
+ code[1] = 0x40000000;
+ code[0] = 0xe0000000;
+
+ code[1] |= neg_mul << 26;
+ code[1] |= neg_add << 27;
+
+ roundMode_MAD(i);
+
+ emitForm_MAD(i);
+}
+
+void
CodeEmitterNV50::emitFADD(const Instruction *i)
{
const int neg0 = i->src(0).mod.neg();
@@ -1028,6 +1053,25 @@ CodeEmitterNV50::emitFADD(const Instruction *i)
}
void
+CodeEmitterNV50::emitDADD(const Instruction *i)
+{
+ const int neg0 = i->src(0).mod.neg();
+ const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
+
+ assert(!(i->src(0).mod | i->src(1).mod).abs());
+ assert(!i->saturate);
+ assert(i->encSize == 8);
+
+ code[1] = 0x60000000;
+ code[0] = 0xe0000000;
+
+ emitForm_ADD(i);
+
+ code[1] |= neg0 << 26;
+ code[1] |= neg1 << 27;
+}
+
+void
CodeEmitterNV50::emitUADD(const Instruction *i)
{
const int neg0 = i->src(0).mod.neg();
@@ -1081,7 +1125,10 @@ CodeEmitterNV50::emitIMUL(const Instruction *i)
if (i->encSize == 8) {
code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
- emitForm_MAD(i);
+ if (i->src(1).getFile() == FILE_IMMEDIATE)
+ emitForm_IMM(i);
+ else
+ emitForm_MAD(i);
} else {
if (i->sType == TYPE_S16)
code[0] |= 0x8100;
@@ -1121,6 +1168,25 @@ CodeEmitterNV50::emitFMUL(const Instruction *i)
}
void
+CodeEmitterNV50::emitDMUL(const Instruction *i)
+{
+ const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
+
+ assert(!i->saturate);
+ assert(i->encSize == 8);
+
+ code[1] = 0x80000000;
+ code[0] = 0xe0000000;
+
+ if (neg)
+ code[1] |= 0x08000000;
+
+ roundMode_CVT(i->rnd);
+
+ emitForm_MAD(i);
+}
+
+void
CodeEmitterNV50::emitIMAD(const Instruction *i)
{
code[0] = 0x60000000;
@@ -1136,7 +1202,10 @@ CodeEmitterNV50::emitIMAD(const Instruction *i)
code[1] |= neg1 << 27;
code[1] |= neg2 << 26;
- emitForm_MAD(i);
+ if (i->src(1).getFile() == FILE_IMMEDIATE)
+ emitForm_IMM(i);
+ else
+ emitForm_MAD(i);
if (i->flagsSrc >= 0) {
// add with carry from $cX
@@ -1181,9 +1250,11 @@ CodeEmitterNV50::emitSET(const Instruction *i)
code[0] = 0x30000000;
code[1] = 0x60000000;
- emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
-
switch (i->sType) {
+ case TYPE_F64:
+ code[0] = 0xe0000000;
+ code[1] = 0xe0000000;
+ break;
case TYPE_F32: code[0] |= 0x80000000; break;
case TYPE_S32: code[1] |= 0x0c000000; break;
case TYPE_U32: code[1] |= 0x04000000; break;
@@ -1193,6 +1264,9 @@ CodeEmitterNV50::emitSET(const Instruction *i)
assert(0);
break;
}
+
+ emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
+
if (i->src(0).mod.neg()) code[1] |= 0x04000000;
if (i->src(1).mod.neg()) code[1] |= 0x08000000;
if (i->src(0).mod.abs()) code[1] |= 0x00100000;
@@ -1756,7 +1830,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
break;
case OP_ADD:
case OP_SUB:
- if (isFloatType(insn->dType))
+ if (insn->dType == TYPE_F64)
+ emitDADD(insn);
+ else if (isFloatType(insn->dType))
emitFADD(insn);
else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
emitAADD(insn);
@@ -1764,14 +1840,18 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
emitUADD(insn);
break;
case OP_MUL:
- if (isFloatType(insn->dType))
+ if (insn->dType == TYPE_F64)
+ emitDMUL(insn);
+ else if (isFloatType(insn->dType))
emitFMUL(insn);
else
emitIMUL(insn);
break;
case OP_MAD:
case OP_FMA:
- if (isFloatType(insn->dType))
+ if (insn->dType == TYPE_F64)
+ emitDMAD(insn);
+ else if (isFloatType(insn->dType))
emitFMAD(insn);
else
emitIMAD(insn);
@@ -1943,7 +2023,7 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
{
const Target::OpInfo &info = targ->getOpInfo(i);
- if (info.minEncSize > 4)
+ if (info.minEncSize > 4 || i->dType == TYPE_F64)
return 8;
// check constraints on dst and src operands
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index fd103146c72..2a13e1086a0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -323,6 +323,14 @@ CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
assert(imm);
u32 = imm->reg.data.u32;
+ if ((code[0] & 0xf) == 0x1) {
+ // double immediate
+ uint64_t u64 = imm->reg.data.u64;
+ assert(!(u64 & 0x00000fffffffffffULL));
+ assert(!(code[1] & 0xc000));
+ code[0] |= ((u64 >> 44) & 0x3f) << 26;
+ code[1] |= 0xc000 | (u64 >> 50);
+ } else
if ((code[0] & 0xf) == 0x2) {
// LIMM
code[0] |= (u32 & 0x3f) << 26;
@@ -1831,6 +1839,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
case SV_VERTEX_COUNT: return 0x10;
case SV_INVOCATION_ID: return 0x11;
case SV_YDIR: return 0x12;
+ case SV_THREAD_KILL: return 0x13;
case SV_TID: return 0x21 + SDATA(ref).sv.index;
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 6a7cb4224f4..08a73d79781 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -376,6 +376,7 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
+ case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
default:
assert(0);
return nv50_ir::SV_CLOCK;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index eec502be798..75164ef0641 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -75,7 +75,7 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
s[0] = mul->getSrc(0);
s[1] = mul->getSrc(1);
- if (isSignedType(mul->sType)) {
+ if (isSignedType(mul->sType) && highResult) {
s[0] = bld->getSSA(fullSize);
s[1] = bld->getSSA(fullSize);
bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 44f74c61304..0f1dcf0dacd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -155,7 +155,7 @@ private:
void checkSwapSrc01(Instruction *);
bool isCSpaceLoad(Instruction *);
- bool isImmd32Load(Instruction *);
+ bool isImmdLoad(Instruction *);
bool isAttribOrSharedLoad(Instruction *);
};
@@ -166,9 +166,10 @@ LoadPropagation::isCSpaceLoad(Instruction *ld)
}
bool
-LoadPropagation::isImmd32Load(Instruction *ld)
+LoadPropagation::isImmdLoad(Instruction *ld)
{
- if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4))
+ if (!ld || (ld->op != OP_MOV) ||
+ ((typeSizeof(ld->dType) != 4) && (typeSizeof(ld->dType) != 8)))
return false;
return ld->src(0).getFile() == FILE_IMMEDIATE;
}
@@ -201,8 +202,8 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
else
return;
} else
- if (isImmd32Load(i0)) {
- if (!isCSpaceLoad(i1) && !isImmd32Load(i1))
+ if (isImmdLoad(i0)) {
+ if (!isCSpaceLoad(i1) && !isImmdLoad(i1))
insn->swapSources(0, 1);
else
return;
@@ -447,6 +448,7 @@ ConstantFolding::expr(Instruction *i,
{
struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
struct Storage res;
+ DataType type = i->dType;
memset(&res.data, 0, sizeof(res.data));
@@ -588,6 +590,18 @@ ConstantFolding::expr(Instruction *i,
// The two arguments to pfetch are logically added together. Normally
// the second argument will not be constant, but that can happen.
res.data.u32 = a->data.u32 + b->data.u32;
+ type = TYPE_U32;
+ break;
+ case OP_MERGE:
+ switch (i->dType) {
+ case TYPE_U64:
+ case TYPE_S64:
+ case TYPE_F64:
+ res.data.u64 = (((uint64_t)b->data.u32) << 32) | a->data.u32;
+ break;
+ default:
+ return;
+ }
break;
default:
return;
@@ -602,6 +616,8 @@ ConstantFolding::expr(Instruction *i,
i->setSrc(1, NULL);
i->getSrc(0)->reg.data = res.data;
+ i->getSrc(0)->reg.type = type;
+ i->getSrc(0)->reg.size = typeSizeof(type);
switch (i->op) {
case OP_MAD:
@@ -1148,6 +1164,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
#define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \
case type: \
switch (i->sType) { \
+ case TYPE_F64: \
+ res.data.dst = util_iround(i->saturate ? \
+ CLAMP(imm0.reg.data.f64, fmin, fmax) : \
+ imm0.reg.data.f64); \
+ break; \
case TYPE_F32: \
res.data.dst = util_iround(i->saturate ? \
CLAMP(imm0.reg.data.f32, fmin, fmax) : \
@@ -1185,6 +1206,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX);
case TYPE_F32:
switch (i->sType) {
+ case TYPE_F64:
+ res.data.f32 = i->saturate ?
+ CLAMP(imm0.reg.data.f64, 0.0f, 1.0f) :
+ imm0.reg.data.f64;
+ break;
case TYPE_F32:
res.data.f32 = i->saturate ?
CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) :
@@ -1199,6 +1225,27 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
}
i->setSrc(0, bld.mkImm(res.data.f32));
break;
+ case TYPE_F64:
+ switch (i->sType) {
+ case TYPE_F64:
+ res.data.f64 = i->saturate ?
+ CLAMP(imm0.reg.data.f64, 0.0f, 1.0f) :
+ imm0.reg.data.f64;
+ break;
+ case TYPE_F32:
+ res.data.f64 = i->saturate ?
+ CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) :
+ imm0.reg.data.f32;
+ break;
+ case TYPE_U16: res.data.f64 = (double) imm0.reg.data.u16; break;
+ case TYPE_U32: res.data.f64 = (double) imm0.reg.data.u32; break;
+ case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
+ case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
+ default:
+ return;
+ }
+ i->setSrc(0, bld.mkImm(res.data.f64));
+ break;
default:
return;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 5f30f3d354b..0b02599dbdd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -275,6 +275,7 @@ static const char *SemanticStr[SV_LAST + 1] =
"SBASE",
"VERTEX_STRIDE",
"INVOCATION_INFO",
+ "THREAD_KILL",
"?",
"(INVALID)"
};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index afc8ff1374f..4390a726d1c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -373,6 +373,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
if (!code)
return false;
emit->setCodeLocation(code, binSize);
+ info->bin.instructions = 0;
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
Function *fn = reinterpret_cast<Function *>(fi.get());
@@ -382,6 +383,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
for (int b = 0; b < fn->bbCount; ++b) {
for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
emit->emitInstruction(i);
+ info->bin.instructions++;
if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
info->io.fp64 = true;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index f3ddcaa5199..94cf0f0e05e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -343,7 +343,7 @@ TargetNV50::insnCanLoad(const Instruction *i, int s,
}
if (sf == FILE_IMMEDIATE)
- return true;
+ return ldSize <= 4;
// Check if memory access is encodable:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 27df0eba66b..8f59d86a72f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -338,17 +338,30 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
if (sf == FILE_IMMEDIATE) {
Storage &reg = ld->getSrc(0)->asImm()->reg;
- if (typeSizeof(i->sType) > 4)
- return false;
- if (opInfo[i->op].immdBits != 0xffffffff) {
- if (i->sType == TYPE_F32) {
+ if (opInfo[i->op].immdBits != 0xffffffff || typeSizeof(i->sType) > 4) {
+ switch (i->sType) {
+ case TYPE_F64:
+ if (reg.data.u64 & 0x00000fffffffffffULL)
+ return false;
+ break;
+ case TYPE_F32:
if (reg.data.u32 & 0xfff)
return false;
- } else
- if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
+ break;
+ case TYPE_S32:
+ case TYPE_U32:
// with u32, 0xfffff counts as 0xffffffff as well
if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
return false;
+ break;
+ case TYPE_U8:
+ case TYPE_S8:
+ case TYPE_U16:
+ case TYPE_S16:
+ case TYPE_F16:
+ break;
+ default:
+ return false;
}
} else
if (i->op == OP_MAD || i->op == OP_FMA) {
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 72e070b5f06..68e69beb08f 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -225,21 +225,22 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
* for write/read by waiting on the buffer's relevant fences.
*/
static inline bool
-nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
+nouveau_buffer_sync(struct nouveau_context *nv,
+ struct nv04_resource *buf, unsigned rw)
{
if (rw == PIPE_TRANSFER_READ) {
if (!buf->fence_wr)
return true;
NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
!nouveau_fence_signalled(buf->fence_wr));
- if (!nouveau_fence_wait(buf->fence_wr))
+ if (!nouveau_fence_wait(buf->fence_wr, &nv->debug))
return false;
} else {
if (!buf->fence)
return true;
NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
!nouveau_fence_signalled(buf->fence));
- if (!nouveau_fence_wait(buf->fence))
+ if (!nouveau_fence_wait(buf->fence, &nv->debug))
return false;
nouveau_fence_ref(NULL, &buf->fence);
@@ -478,7 +479,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
/* Discarding was not possible, must sync because
* subsequent transfers might use UNSYNCHRONIZED. */
- nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
+ nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
} else
if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
/* The whole range is being discarded, so it doesn't matter what was
@@ -490,7 +491,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
if (usage & PIPE_TRANSFER_DONTBLOCK)
map = NULL;
else
- nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
+ nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
} else {
/* It is expected that the returned buffer be a representation of the
* data in question, so we must copy it over from the buffer. */
@@ -615,7 +616,7 @@ nouveau_resource_map_offset(struct nouveau_context *nv,
if (res->mm) {
unsigned rw;
rw = (flags & NOUVEAU_BO_WR) ? PIPE_TRANSFER_WRITE : PIPE_TRANSFER_READ;
- nouveau_buffer_sync(res, rw);
+ nouveau_buffer_sync(nv, res, rw);
if (nouveau_bo_map(res->bo, 0, NULL))
return NULL;
} else {
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index decb2714ede..c3bbb11bd60 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -2,6 +2,7 @@
#define __NOUVEAU_CONTEXT_H__
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
#include <nouveau.h>
#define NOUVEAU_MAX_SCRATCH_BUFS 4
@@ -14,6 +15,7 @@ struct nouveau_context {
struct nouveau_client *client;
struct nouveau_pushbuf *pushbuf;
+ struct pipe_debug_callback debug;
bool vbo_dirty;
@@ -64,6 +66,9 @@ void
nouveau_context_init_vdec(struct nouveau_context *);
void
+nouveau_context_init(struct nouveau_context *);
+
+void
nouveau_scratch_runout_release(struct nouveau_context *);
/* This is needed because we don't hold references outside of context::scratch,
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index 21cf2b9ae5e..691553ae7e4 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -23,6 +23,7 @@
#include "nouveau_screen.h"
#include "nouveau_winsys.h"
#include "nouveau_fence.h"
+#include "os/os_time.h"
#ifdef PIPE_OS_UNIX
#include <sched.h>
@@ -58,26 +59,6 @@ nouveau_fence_trigger_work(struct nouveau_fence *fence)
}
}
-bool
-nouveau_fence_work(struct nouveau_fence *fence,
- void (*func)(void *), void *data)
-{
- struct nouveau_fence_work *work;
-
- if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
- func(data);
- return true;
- }
-
- work = CALLOC_STRUCT(nouveau_fence_work);
- if (!work)
- return false;
- work->func = func;
- work->data = data;
- LIST_ADD(&work->list, &fence->work);
- return true;
-}
-
void
nouveau_fence_emit(struct nouveau_fence *fence)
{
@@ -181,11 +162,10 @@ nouveau_fence_signalled(struct nouveau_fence *fence)
return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED;
}
-bool
-nouveau_fence_wait(struct nouveau_fence *fence)
+static bool
+nouveau_fence_kick(struct nouveau_fence *fence)
{
struct nouveau_screen *screen = fence->screen;
- uint32_t spins = 0;
/* wtf, someone is waiting on a fence in flush_notify handler? */
assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
@@ -206,11 +186,32 @@ nouveau_fence_wait(struct nouveau_fence *fence)
if (fence == screen->fence.current)
nouveau_fence_next(screen);
- do {
- nouveau_fence_update(screen, false);
+ nouveau_fence_update(screen, false);
+
+ return true;
+}
- if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
+bool
+nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debug)
+{
+ struct nouveau_screen *screen = fence->screen;
+ uint32_t spins = 0;
+ int64_t start = 0;
+
+ if (debug && debug->debug_message)
+ start = os_time_get_nano();
+
+ if (!nouveau_fence_kick(fence))
+ return false;
+
+ do {
+ if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
+ if (debug && debug->debug_message)
+ pipe_debug_message(debug, PERF_INFO,
+ "stalled %.3f ms waiting for fence",
+ (os_time_get_nano() - start) / 1000000.f);
return true;
+ }
if (!spins)
NOUVEAU_DRV_STAT(screen, any_non_kernel_fence_sync_count, 1);
spins++;
@@ -218,6 +219,8 @@ nouveau_fence_wait(struct nouveau_fence *fence)
if (!(spins % 8)) /* donate a few cycles */
sched_yield();
#endif
+
+ nouveau_fence_update(screen, false);
} while (spins < NOUVEAU_FENCE_MAX_SPINS);
debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n",
@@ -249,3 +252,26 @@ nouveau_fence_unref_bo(void *data)
nouveau_bo_ref(NULL, &bo);
}
+
+bool
+nouveau_fence_work(struct nouveau_fence *fence,
+ void (*func)(void *), void *data)
+{
+ struct nouveau_fence_work *work;
+
+ if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
+ func(data);
+ return true;
+ }
+
+ work = CALLOC_STRUCT(nouveau_fence_work);
+ if (!work)
+ return false;
+ work->func = func;
+ work->data = data;
+ LIST_ADD(&work->list, &fence->work);
+ p_atomic_inc(&fence->work_count);
+ if (fence->work_count > 64)
+ nouveau_fence_kick(fence);
+ return true;
+}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
index 2efcab2172d..f10016da826 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -11,6 +11,8 @@
#define NOUVEAU_FENCE_STATE_FLUSHED 3
#define NOUVEAU_FENCE_STATE_SIGNALLED 4
+struct pipe_debug_callback;
+
struct nouveau_fence_work {
struct list_head list;
void (*func)(void *);
@@ -23,6 +25,7 @@ struct nouveau_fence {
int state;
int ref;
uint32_t sequence;
+ uint32_t work_count;
struct list_head work;
};
@@ -34,7 +37,7 @@ bool nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
bool nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
void nouveau_fence_update(struct nouveau_screen *, bool flushed);
void nouveau_fence_next(struct nouveau_screen *);
-bool nouveau_fence_wait(struct nouveau_fence *);
+bool nouveau_fence_wait(struct nouveau_fence *, struct pipe_debug_callback *);
bool nouveau_fence_signalled(struct nouveau_fence *);
void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 47603b0b7fd..a6065e45aaa 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -18,6 +18,7 @@
#include "nouveau_winsys.h"
#include "nouveau_screen.h"
+#include "nouveau_context.h"
#include "nouveau_fence.h"
#include "nouveau_mm.h"
#include "nouveau_buffer.h"
@@ -75,7 +76,7 @@ nouveau_screen_fence_finish(struct pipe_screen *screen,
if (!timeout)
return nouveau_fence_signalled(nouveau_fence(pfence));
- return nouveau_fence_wait(nouveau_fence(pfence));
+ return nouveau_fence_wait(nouveau_fence(pfence), NULL);
}
@@ -238,3 +239,21 @@ nouveau_screen_fini(struct nouveau_screen *screen)
nouveau_device_del(&screen->device);
}
+
+static void
+nouveau_set_debug_callback(struct pipe_context *pipe,
+ const struct pipe_debug_callback *cb)
+{
+ struct nouveau_context *context = nouveau_context(pipe);
+
+ if (cb)
+ context->debug = *cb;
+ else
+ memset(&context->debug, 0, sizeof(context->debug));
+}
+
+void
+nouveau_context_init(struct nouveau_context *context)
+{
+ context->pipe.set_debug_callback = nouveau_set_debug_callback;
+}
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.c b/src/gallium/drivers/nouveau/nouveau_vp3_video.c
index f3a64b22c57..4652e56c49a 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.c
@@ -437,6 +437,7 @@ nouveau_vp3_screen_get_video_param(struct pipe_screen *pscreen,
/* VP3 does not support MPEG4, VP4+ do. */
return entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM &&
profile >= PIPE_VIDEO_PROFILE_MPEG1 &&
+ profile < PIPE_VIDEO_PROFILE_HEVC_MAIN &&
(!vp3 || codec != PIPE_VIDEO_FORMAT_MPEG4) &&
firmware_present(pscreen, profile);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c
index a36fd57fae7..3ed088912e2 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c
@@ -242,6 +242,7 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
if (debug_get_bool_option("NV30_SWTNL", false))
nv30->draw_flags |= NV30_NEW_SWTNL;
+ nouveau_context_init(&nv30->base);
nv30->sample_mask = 0xffff;
nv30_vbo_init(pipe);
nv30_query_init(pipe);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index bdecb0a32b3..154c3d370f7 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -173,6 +173,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -353,7 +354,7 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence)
*sequence = ++screen->base.fence.sequence;
- assert(PUSH_AVAIL(push) >= 3);
+ assert(PUSH_AVAIL(push) + push->rsvd_kick >= 3);
PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
(2 /* size */ << 18) | (7 /* subchan */ << 13));
PUSH_DATA (push, 0);
@@ -383,7 +384,7 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
* _current_ one, and remove both.
*/
nouveau_fence_ref(screen->base.fence.current, &current);
- nouveau_fence_wait(current);
+ nouveau_fence_wait(current, NULL);
nouveau_fence_ref(NULL, &current);
nouveau_fence_ref(NULL, &screen->base.fence.current);
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 4108f48005e..7867c2df7f3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -306,6 +306,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
}
nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;
+ nouveau_context_init(&nv50->base);
nv50_init_query_functions(nv50);
nv50_init_surface_functions(nv50);
nv50_init_state_functions(nv50);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 80f92be682d..49a93bf1d91 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -203,10 +203,8 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
-#if NOUVEAU_DRIVER != 0xc0
C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
-#endif
F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 299629b6438..89e7a338283 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -318,7 +318,8 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
}
bool
-nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
+nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
+ struct pipe_debug_callback *debug)
{
struct nv50_ir_prog_info *info;
int ret;
@@ -406,6 +407,11 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
prog->so = nv50_program_create_strmout_state(info,
&prog->pipe.stream_output);
+ pipe_debug_message(debug, SHADER_INFO,
+ "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
+ prog->type, info->bin.tlsSpace, prog->max_gpr,
+ info->bin.instructions, info->bin.codeSize);
+
out:
FREE(info);
return !ret;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 24cc96567d7..7a33eb11d6d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -106,7 +106,8 @@ struct nv50_program {
struct nv50_stream_output_state *so;
};
-bool nv50_program_translate(struct nv50_program *, uint16_t chipset);
+bool nv50_program_translate(struct nv50_program *, uint16_t chipset,
+ struct pipe_debug_callback *);
bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
index a46e622c597..b40370a1d78 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -151,4 +151,11 @@ nv50_surface_from_buffer(struct pipe_context *pipe,
void
nv50_surface_destroy(struct pipe_context *, struct pipe_surface *);
+void
+nv50_clear_texture(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned level,
+ const struct pipe_box *box,
+ const void *data);
+
#endif /* __NV50_RESOURCE_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index a9e0c478322..f47e998ab1e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -182,6 +182,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -350,7 +351,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
* _current_ one, and remove both.
*/
nouveau_fence_ref(screen->base.fence.current, &current);
- nouveau_fence_wait(current);
+ nouveau_fence_wait(current, NULL);
nouveau_fence_ref(NULL, &current);
nouveau_fence_ref(NULL, &screen->base.fence.current);
}
@@ -392,7 +393,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
/* we need to do it after possible flush in MARK_RING */
*sequence = ++screen->base.fence.sequence;
- assert(PUSH_AVAIL(push) >= 5);
+ assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
PUSH_DATAh(push, screen->fence.bo->offset);
PUSH_DATA (push, screen->fence.bo->offset);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 9b911043132..8e4b2b42bda 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -113,7 +113,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
{
if (!prog->translated) {
prog->translated = nv50_program_translate(
- prog, nv50->screen->base.device->chipset);
+ prog, nv50->screen->base.device->chipset, &nv50->base.debug);
if (!prog->translated)
return false;
} else
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 6c8c9f0b4e6..d27f12ca94b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -727,7 +727,8 @@ nv50_sp_state_create(struct pipe_context *pipe,
prog->pipe.stream_output = cso->stream_output;
prog->translated = nv50_program_translate(
- prog, nv50_context(pipe)->screen->base.device->chipset);
+ prog, nv50_context(pipe)->screen->base.device->chipset,
+ &nouveau_context(pipe)->debug);
return (void *)prog;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 237d76d6adb..916a7d44a31 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -27,6 +27,7 @@
#include "util/u_inlines.h"
#include "util/u_pack_color.h"
#include "util/u_format.h"
+#include "util/u_math.h"
#include "util/u_surface.h"
#include "tgsi/tgsi_ureg.h"
@@ -324,6 +325,9 @@ nv50_clear_render_target(struct pipe_context *pipe,
else
PUSH_DATA(push, 512);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, mt->ms_mode);
+
if (!nouveau_bo_memtype(bo)) {
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -404,6 +408,9 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
PUSH_DATA (push, 512);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, mt->ms_mode);
+
BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
PUSH_DATA (push, (width << 16) | dstx);
PUSH_DATA (push, (height << 16) | dsty);
@@ -418,6 +425,80 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
}
void
+nv50_clear_texture(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned level,
+ const struct pipe_box *box,
+ const void *data)
+{
+ struct pipe_surface tmpl = {{0}}, *sf;
+
+ tmpl.format = res->format;
+ tmpl.u.tex.first_layer = box->z;
+ tmpl.u.tex.last_layer = box->z + box->depth - 1;
+ tmpl.u.tex.level = level;
+ sf = pipe->create_surface(pipe, res, &tmpl);
+ if (!sf)
+ return;
+
+ if (util_format_is_depth_or_stencil(res->format)) {
+ float depth = 0;
+ uint8_t stencil = 0;
+ unsigned clear = 0;
+ const struct util_format_description *desc =
+ util_format_description(res->format);
+
+ if (util_format_has_depth(desc)) {
+ clear |= PIPE_CLEAR_DEPTH;
+ desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
+ }
+ if (util_format_has_stencil(desc)) {
+ clear |= PIPE_CLEAR_STENCIL;
+ desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
+ }
+ pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
+ box->x, box->y, box->width, box->height);
+ } else {
+ union pipe_color_union color;
+
+ switch (util_format_get_blocksizebits(res->format)) {
+ case 128:
+ sf->format = PIPE_FORMAT_R32G32B32A32_UINT;
+ memcpy(&color.ui, data, 128 / 8);
+ break;
+ case 64:
+ sf->format = PIPE_FORMAT_R32G32_UINT;
+ memcpy(&color.ui, data, 64 / 8);
+ memset(&color.ui[2], 0, 64 / 8);
+ break;
+ case 32:
+ sf->format = PIPE_FORMAT_R32_UINT;
+ memcpy(&color.ui, data, 32 / 8);
+ memset(&color.ui[1], 0, 96 / 8);
+ break;
+ case 16:
+ sf->format = PIPE_FORMAT_R16_UINT;
+ color.ui[0] = util_cpu_to_le32(
+ util_le16_to_cpu(*(unsigned short *)data));
+ memset(&color.ui[1], 0, 96 / 8);
+ break;
+ case 8:
+ sf->format = PIPE_FORMAT_R8_UINT;
+ color.ui[0] = util_cpu_to_le32(*(unsigned char *)data);
+ memset(&color.ui[1], 0, 96 / 8);
+ break;
+ default:
+ assert(!"Unknown texel element size");
+ return;
+ }
+
+ pipe->clear_render_target(pipe, sf, &color,
+ box->x, box->y, box->width, box->height);
+ }
+ pipe->surface_destroy(pipe, sf);
+}
+
+void
nv50_clear(struct pipe_context *pipe, unsigned buffers,
const union pipe_color_union *color,
double depth, unsigned stencil)
@@ -464,11 +545,9 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
if (mode) {
int zs_layers = 0, color0_layers = 0;
if (fb->cbufs[0] && (mode & 0x3c))
- color0_layers = fb->cbufs[0]->u.tex.last_layer -
- fb->cbufs[0]->u.tex.first_layer + 1;
+ color0_layers = nv50_surface(fb->cbufs[0])->depth;
if (fb->zsbuf && (mode & ~0x3c))
- zs_layers = fb->zsbuf->u.tex.last_layer -
- fb->zsbuf->u.tex.first_layer + 1;
+ zs_layers = nv50_surface(fb->zsbuf)->depth;
for (j = 0; j < MIN2(zs_layers, color0_layers); j++) {
BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
@@ -488,7 +567,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
struct pipe_surface *sf = fb->cbufs[i];
if (!sf || !(buffers & (PIPE_CLEAR_COLOR0 << i)))
continue;
- for (j = 0; j <= sf->u.tex.last_layer - sf->u.tex.first_layer; j++) {
+ for (j = 0; j < nv50_surface(sf)->depth; j++) {
BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, (i << 6) | 0x3c |
(j << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
@@ -585,6 +664,8 @@ nv50_clear_buffer(struct pipe_context *pipe,
PUSH_DATA (push, height);
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, 0);
/* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
@@ -1593,6 +1674,7 @@ nv50_init_surface_functions(struct nv50_context *nv50)
pipe->resource_copy_region = nv50_resource_copy_region;
pipe->blit = nv50_blit;
pipe->flush_resource = nv50_flush_resource;
+ pipe->clear_texture = nv50_clear_texture;
pipe->clear_render_target = nv50_clear_render_target;
pipe->clear_depth_stencil = nv50_clear_depth_stencil;
pipe->clear_buffer = nv50_clear_buffer;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 9fa6fceeefa..9aa593f919e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
* pushbuf submit, but it's probably not a big performance difference.
*/
if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
- nouveau_fence_wait(buf->fence_wr);
+ nouveau_fence_wait(buf->fence_wr, &nv50->base.debug);
while (instance_count--) {
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index e33af042620..2e7c790e9ee 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -120,7 +120,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
if (!prog->translated) {
prog->translated = nvc0_program_translate(
- prog, nvc0->screen->base.device->chipset);
+ prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
if (!prog->translated)
return false;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index f7604f11788..82ed5a1864e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -309,6 +309,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->memory_barrier = nvc0_memory_barrier;
pipe->get_sample_position = nvc0_context_get_sample_position;
+ nouveau_context_init(&nvc0->base);
nvc0_init_query_functions(nvc0);
nvc0_init_surface_functions(nvc0);
nvc0_init_state_functions(nvc0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 4af83c53224..39b73ecb0c2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -224,7 +224,8 @@ void nvc0_default_kick_notify(struct nouveau_pushbuf *);
extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
/* nvc0_program.c */
-bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
+bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset,
+ struct pipe_debug_callback *);
bool nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_library_upload(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 68048f9d6c0..43d7c7b1123 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -517,7 +517,8 @@ nvc0_program_dump(struct nvc0_program *prog)
#endif
bool
-nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
+nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
+ struct pipe_debug_callback *debug)
{
struct nv50_ir_prog_info *info;
int ret;
@@ -639,6 +640,11 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
prog->tfb = nvc0_program_create_tfb_state(info,
&prog->pipe.stream_output);
+ pipe_debug_message(debug, SHADER_INFO,
+ "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
+ prog->type, info->bin.tlsSpace, prog->num_gprs,
+ info->bin.instructions, info->bin.codeSize);
+
out:
FREE(info);
return !ret;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 6ad3980911d..461fcaaf677 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -182,11 +182,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_COMPUTE:
- return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+ return (class_3d <= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
@@ -245,7 +246,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 0;
break;
case PIPE_SHADER_COMPUTE:
- if (class_3d != NVE4_3D_CLASS)
+ if (class_3d > NVE4_3D_CLASS)
return 0;
break;
default:
@@ -415,7 +416,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
* _current_ one, and remove both.
*/
nouveau_fence_ref(screen->base.fence.current, &current);
- nouveau_fence_wait(current);
+ nouveau_fence_wait(current, NULL);
nouveau_fence_ref(NULL, &current);
nouveau_fence_ref(NULL, &screen->base.fence.current);
}
@@ -547,7 +548,7 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
/* we need to do it after possible flush in MARK_RING */
*sequence = ++screen->base.fence.sequence;
- assert(PUSH_AVAIL(push) >= 5);
+ assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
PUSH_DATAh(push, screen->fence.bo->offset);
PUSH_DATA (push, screen->fence.bo->offset);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 8595800592c..7e2e9992fe8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -72,7 +72,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (!prog->translated) {
prog->translated = nvc0_program_translate(
- prog, nvc0->screen->base.device->chipset);
+ prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
if (!prog->translated)
return false;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index ba1714da010..5dce5f0e65d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -681,7 +681,8 @@ nvc0_sp_state_create(struct pipe_context *pipe,
prog->pipe.stream_output = cso->stream_output;
prog->translated = nvc0_program_translate(
- prog, nvc0_context(pipe)->screen->base.device->chipset);
+ prog, nvc0_context(pipe)->screen->base.device->chipset,
+ &nouveau_context(pipe)->debug);
return (void *)prog;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index be123349148..cdb1fc1145f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -67,7 +67,7 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
case 1:
return NV50_SURFACE_FORMAT_R8_UNORM;
case 2:
- return NV50_SURFACE_FORMAT_R16_UNORM;
+ return NV50_SURFACE_FORMAT_RG8_UNORM;
case 4:
return NV50_SURFACE_FORMAT_BGRA8_UNORM;
case 8:
@@ -319,6 +319,7 @@ nvc0_clear_render_target(struct pipe_context *pipe,
PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
PUSH_DATA(push, mt->layer_stride >> 2);
PUSH_DATA(push, dst->u.tex.first_layer);
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
} else {
if (res->base.target == PIPE_BUFFER) {
PUSH_DATA(push, 262144);
@@ -334,6 +335,7 @@ nvc0_clear_render_target(struct pipe_context *pipe,
PUSH_DATA(push, 0);
IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
/* tiled textures don't have to be fenced, they're not mapped directly */
nvc0_resource_fence(res, NOUVEAU_BO_WR);
@@ -466,6 +468,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
PUSH_DATA (push, 0);
IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
@@ -540,6 +543,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
PUSH_DATA (push, dst->u.tex.first_layer);
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
@@ -1541,5 +1545,6 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0)
pipe->flush_resource = nvc0_flush_resource;
pipe->clear_render_target = nvc0_clear_render_target;
pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
+ pipe->clear_texture = nv50_clear_texture;
pipe->clear_buffer = nvc0_clear_buffer;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index d459dd61c19..279c7e93cc8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -340,8 +340,8 @@ nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
return !nouveau_bo_wait(mt->base.bo, access, nvc0->base.client);
}
if (usage & PIPE_TRANSFER_WRITE)
- return !mt->base.fence || nouveau_fence_wait(mt->base.fence);
- return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr);
+ return !mt->base.fence || nouveau_fence_wait(mt->base.fence, &nvc0->base.debug);
+ return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr, &nvc0->base.debug);
}
void *
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index d5981248a86..606e25f915b 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -199,6 +199,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
/* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 6f2b7ba0db3..5743e3fe538 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -346,7 +346,7 @@ static void evergreen_emit_direct_dispatch(
const uint *block_layout, const uint *grid_layout)
{
int i;
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
unsigned num_waves;
unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
@@ -417,12 +417,12 @@ static void evergreen_emit_direct_dispatch(
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
const uint *grid_layout)
{
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
unsigned i;
/* make sure that the gfx ring is only one active */
- if (ctx->b.rings.dma.cs && ctx->b.rings.dma.cs->cdw) {
- ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) {
+ ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
/* Initialize all the compute-related registers.
@@ -439,7 +439,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
- unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
+ unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx,
(struct r600_resource*)cb->base.texture,
RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_RW_BUFFER);
@@ -538,7 +538,7 @@ void evergreen_emit_cs_shader(
struct r600_cs_shader_state *state =
(struct r600_cs_shader_state*)atom;
struct r600_pipe_compute *shader = state->shader;
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint64_t va;
struct r600_resource *code_bo;
unsigned ngpr, nstack;
@@ -564,7 +564,7 @@ void evergreen_emit_cs_shader(
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
code_bo, RADEON_USAGE_READ,
RADEON_PRIO_USER_SHADER));
}
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 89abe92cbb4..a0f46800403 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -35,7 +35,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
uint64_t src_offset,
uint64_t size)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
+ struct radeon_winsys_cs *cs = rctx->b.dma.cs;
unsigned i, ncopy, csize, sub_cmd, shift;
struct r600_resource *rdst = (struct r600_resource*)dst;
struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -64,9 +64,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
for (i = 0; i < ncopy; i++) {
csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
/* emit reloc before writing cs so that cs is always in consistent state */
- radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ,
RADEON_PRIO_SDMA_BUFFER);
- radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE,
RADEON_PRIO_SDMA_BUFFER);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
@@ -86,7 +86,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
struct pipe_resource *dst, uint64_t offset,
unsigned size, uint32_t clear_value)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
assert(size);
assert(rctx->screen->b.has_cp_dma);
@@ -129,7 +129,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
}
/* This must be done after r600_need_cs_space. */
- reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
RADEON_PRIO_CP_DMA);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index c6702a9ca34..684eee7a355 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -666,6 +666,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
enum pipe_format pipe_format = state->format;
struct radeon_surf_level *surflevel;
unsigned base_level, first_level, last_level;
+ unsigned dim, last_layer;
uint64_t va;
if (view == NULL)
@@ -679,7 +680,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
view->base.reference.count = 1;
view->base.context = ctx;
- if (texture->target == PIPE_BUFFER)
+ if (state->target == PIPE_BUFFER)
return texture_buffer_sampler_view(rctx, view, width0, height0);
swizzle[0] = state->swizzle_r;
@@ -773,12 +774,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
}
nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
- if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
+ if (state->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
depth = texture->array_size;
- } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
+ } else if (state->target == PIPE_TEXTURE_2D_ARRAY) {
depth = texture->array_size;
- } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
+ } else if (state->target == PIPE_TEXTURE_CUBE_ARRAY)
depth = texture->array_size / 6;
va = tmp->resource.gpu_address;
@@ -790,7 +791,13 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
view->is_stencil_sampler = true;
view->tex_resource = &tmp->resource;
- view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
+
+ /* array type views and views into array types need to use layer offset */
+ dim = state->target;
+ if (state->target != PIPE_TEXTURE_CUBE)
+ dim = MAX2(state->target, texture->target);
+
+ view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(dim, texture->nr_samples)) |
S_030000_PITCH((pitch / 8) - 1) |
S_030000_TEX_WIDTH(width - 1));
if (rscreen->b.chip_class == CAYMAN)
@@ -818,10 +825,14 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
view->tex_resource_words[3] = (surflevel[base_level].offset + va) >> 8;
}
+ last_layer = state->u.tex.last_layer;
+ if (state->target != texture->target && depth == 1) {
+ last_layer = state->u.tex.first_layer;
+ }
view->tex_resource_words[4] = (word4 |
S_030010_ENDIAN_SWAP(endian));
view->tex_resource_words[5] = S_030014_BASE_ARRAY(state->u.tex.first_layer) |
- S_030014_LAST_ARRAY(state->u.tex.last_layer);
+ S_030014_LAST_ARRAY(last_layer);
view->tex_resource_words[6] = S_030018_TILE_SPLIT(tile_split);
if (texture->nr_samples > 1) {
@@ -860,7 +871,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct pipe_clip_state *state = &rctx->clip_state.state;
radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
@@ -910,7 +921,7 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx,
static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_scissor_state *rstate = &rctx->scissor;
struct pipe_scissor_state *state;
uint32_t dirty_mask;
@@ -1514,7 +1525,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx,
static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
unsigned max_dist = 0;
switch (nr_samples) {
@@ -1555,7 +1566,7 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples,
static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
unsigned nr_cbufs = state->nr_cbufs;
unsigned i, tl, br;
@@ -1580,7 +1591,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
tex = (struct r600_texture *)cb->base.texture;
reloc = radeon_add_to_buffer_list(&rctx->b,
- &rctx->b.rings.gfx,
+ &rctx->b.gfx,
(struct r600_resource*)cb->base.texture,
RADEON_USAGE_READWRITE,
tex->surface.nsamples > 1 ?
@@ -1588,7 +1599,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
RADEON_PRIO_COLOR_BUFFER);
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
- cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+ cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
tex->cmask_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_CMASK);
} else {
@@ -1634,7 +1645,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
if (!rctx->keep_tiling_flags) {
unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
- &rctx->b.rings.gfx,
+ &rctx->b.gfx,
(struct r600_resource*)state->cbufs[0]->texture,
RADEON_USAGE_READWRITE,
RADEON_PRIO_COLOR_BUFFER);
@@ -1657,7 +1668,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
if (state->zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
- &rctx->b.rings.gfx,
+ &rctx->b.gfx,
(struct r600_resource*)state->zsbuf->texture,
RADEON_USAGE_READWRITE,
zb->base.texture->nr_samples > 1 ?
@@ -1719,7 +1730,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
float offset_units = state->offset_units;
float offset_scale = state->offset_scale;
@@ -1746,7 +1757,7 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600
static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
@@ -1761,7 +1772,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_db_state *a = (struct r600_db_state*)atom;
if (a->rsurf && a->rsurf->db_htile_surface) {
@@ -1772,7 +1783,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
- reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+ reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rtex->htile_buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = reloc_idx;
@@ -1784,7 +1795,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
unsigned db_render_control = 0;
unsigned db_count_control = 0;
@@ -1851,7 +1862,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
unsigned resource_offset,
unsigned pkt_flags)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -1886,7 +1897,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
}
state->dirty_mask = 0;
@@ -1910,7 +1921,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
unsigned reg_alu_const_cache,
unsigned pkt_flags)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -1934,7 +1945,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
}
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
@@ -1959,7 +1970,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
dirty_mask &= ~(1 << buffer_index);
@@ -2007,7 +2018,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
struct r600_samplerview_state *state,
unsigned resource_id_base, unsigned pkt_flags)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -2022,7 +2033,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
radeon_emit(cs, (resource_id_base + resource_index) * 8);
radeon_emit_array(cs, rview->tex_resource_words, 8);
- reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource,
RADEON_USAGE_READ,
r600_get_sampler_view_priority(rview->tex_resource));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
@@ -2066,7 +2077,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
unsigned border_index_reg,
unsigned pkt_flags)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint32_t dirty_mask = texinfo->states.dirty_mask;
while (dirty_mask) {
@@ -2119,14 +2130,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at
struct r600_sample_mask *s = (struct r600_sample_mask*)a;
uint8_t mask = s->sample_mask;
- radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
+ radeon_set_context_reg(rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK,
mask | (mask << 8) | (mask << 16) | (mask << 24));
}
static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
{
struct r600_sample_mask *s = (struct r600_sample_mask*)a;
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint16_t mask = s->sample_mask;
radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
@@ -2136,21 +2147,21 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_cso_state *state = (struct r600_cso_state*)a;
struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
(shader->buffer->gpu_address + shader->offset) >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer,
RADEON_USAGE_READ,
RADEON_PRIO_INTERNAL_SHADER));
}
static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
uint32_t v = 0, v2 = 0, primid = 0;
@@ -2189,7 +2200,7 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_
static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
struct r600_resource *rbuffer;
@@ -2202,7 +2213,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
rbuffer->gpu_address >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
@@ -2212,7 +2223,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
rbuffer->gpu_address >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
@@ -2362,6 +2373,8 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+ r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+ r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
/* to avoid GPU doing any preloading of constant from random address */
@@ -2801,6 +2814,8 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+ r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+ r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
/* to avoid GPU doing any preloading of constant from random address */
@@ -2940,6 +2955,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
+ switch (rshader->ps_conservative_z) {
+ default: /* fall through */
+ case TGSI_FS_DEPTH_LAYOUT_ANY:
+ db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z);
+ break;
+ case TGSI_FS_DEPTH_LAYOUT_GREATER:
+ db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
+ break;
+ case TGSI_FS_DEPTH_LAYOUT_LESS:
+ db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
+ break;
+ }
+
exports_ps = 0;
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
@@ -3246,7 +3274,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
unsigned pitch,
unsigned bpp)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
+ struct radeon_winsys_cs *cs = rctx->b.dma.cs;
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
@@ -3334,9 +3362,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
}
size = (cheight * pitch) / 4;
/* emit reloc before writing cs so that cs is always in consistent state */
- radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource,
RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
- radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource,
RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
cs->buf[cs->cdw++] = base >> 8;
@@ -3371,7 +3399,7 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
unsigned src_x, src_y;
unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
- if (rctx->b.rings.dma.cs == NULL) {
+ if (rctx->b.dma.cs == NULL) {
goto fallback;
}
@@ -3515,6 +3543,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
+ r600_add_atom(rctx, &rctx->b.render_cond_atom, id++);
r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 937ffcbddb9..25237c6f650 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -815,6 +815,13 @@
#define V_02880C_EXPORT_DB_FOUR16 0x01
#define V_02880C_EXPORT_DB_TWO 0x02
#define S_02880C_ALPHA_TO_MASK_DISABLE(x) (((x) & 0x1) << 12)
+#define S_02880C_CONSERVATIVE_Z_EXPORT(x) (((x) & 0x03) << 16)
+#define G_02880C_CONSERVATIVE_Z_EXPORT(x) (((x) >> 16) & 0x03)
+#define C_02880C_CONSERVATIVE_Z_EXPORT 0xFFFCFFFF
+#define V_02880C_EXPORT_ANY_Z 0
+#define V_02880C_EXPORT_LESS_THAN_Z 1
+#define V_02880C_EXPORT_GREATER_THAN_Z 2
+#define V_02880C_EXPORT_RESERVED 3
#define R_028A00_PA_SU_POINT_SIZE 0x028A00
#define S_028A00_HEIGHT(x) (((x) & 0xFFFF) << 0)
@@ -1497,6 +1504,7 @@
#define S_028878_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28)
#define G_028878_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1)
#define C_028878_UNCACHED_FIRST_INST 0xEFFFFFFF
+#define R_02887C_SQ_PGM_RESOURCES_2_GS 0x02887C
#define R_028890_SQ_PGM_RESOURCES_ES 0x028890
#define S_028890_NUM_GPRS(x) (((x) & 0xFF) << 0)
@@ -1511,6 +1519,7 @@
#define S_028890_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28)
#define G_028890_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1)
#define C_028890_UNCACHED_FIRST_INST 0xEFFFFFFF
+#define R_028894_SQ_PGM_RESOURCES_2_ES 0x028894
#define R_028864_SQ_PGM_RESOURCES_2_VS 0x028864
#define S_028864_SINGLE_ROUND(x) (((x) & 0x3) << 0)
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index aede8408446..8a90489318e 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -87,18 +87,16 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
(struct pipe_sampler_view**)rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
}
- if ((op & R600_DISABLE_RENDER_COND) && rctx->b.current_render_cond) {
- util_blitter_save_render_condition(rctx->blitter,
- rctx->b.current_render_cond,
- rctx->b.current_render_cond_cond,
- rctx->b.current_render_cond_mode);
- }
+ if (op & R600_DISABLE_RENDER_COND)
+ rctx->b.render_cond_force_off = true;
}
static void r600_blitter_end(struct pipe_context *ctx)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- r600_resume_nontimer_queries(&rctx->b);
+
+ rctx->b.render_cond_force_off = false;
+ r600_resume_nontimer_queries(&rctx->b);
}
static unsigned u_max_sample(struct pipe_resource *r)
@@ -527,7 +525,7 @@ static void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst
* Can we somehow flush the index buffer cache? Starting a new IB seems
* to do the trick. */
if (rctx->b.chip_class <= R700)
- rctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ rctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
/**
@@ -604,6 +602,7 @@ static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *ds
} else {
uint32_t *map = r600_buffer_map_sync_with_rings(&rctx->b, r600_resource(dst),
PIPE_TRANSFER_WRITE);
+ map += offset / 4;
size /= 4;
for (unsigned i = 0; i < size; i++)
*map++ = value;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 6f11366e606..6409f0bd9f7 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -33,11 +33,16 @@
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
boolean count_draw_in)
{
+ struct radeon_winsys_cs *dma = ctx->b.dma.cs;
- if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
+ /* Flush the DMA IB if it's not empty. */
+ if (dma && dma->cdw)
+ ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+
+ if (!ctx->b.ws->cs_memory_below_limit(ctx->b.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
ctx->b.gtt = 0;
ctx->b.vram = 0;
- ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
return;
}
/* all will be accounted once relocation are emited */
@@ -45,7 +50,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
ctx->b.vram = 0;
/* The number of dwords we already used in the CS so far. */
- num_dw += ctx->b.rings.gfx.cs->cdw;
+ num_dw += ctx->b.gfx.cs->cdw;
if (count_draw_in) {
uint64_t mask;
@@ -75,11 +80,6 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += ctx->b.streamout.num_dw_for_end;
}
- /* Count in render_condition(NULL) at the end of CS. */
- if (ctx->b.predicate_drawing) {
- num_dw += 3;
- }
-
/* SX_MISC */
if (ctx->b.chip_class == R600) {
num_dw += 3;
@@ -92,14 +92,14 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += 10;
/* Flush if there's not enough space. */
- if (num_dw > ctx->b.rings.gfx.cs->max_dw) {
- ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ if (num_dw > ctx->b.gfx.cs->max_dw) {
+ ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
}
void r600_flush_emit(struct r600_context *rctx)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
unsigned cp_coher_cntl = 0;
unsigned wait_until = 0;
@@ -246,13 +246,11 @@ void r600_context_gfx_flush(void *context, unsigned flags,
struct pipe_fence_handle **fence)
{
struct r600_context *ctx = context;
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
if (cs->cdw == ctx->b.initial_gfx_cs_size && !fence)
return;
- ctx->b.rings.gfx.flushing = true;
-
r600_preflush_suspend_features(&ctx->b);
/* flush the framebuffer cache */
@@ -278,7 +276,6 @@ void r600_context_gfx_flush(void *context, unsigned flags,
/* Flush the CS. */
ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++);
- ctx->b.rings.gfx.flushing = false;
r600_begin_new_cs(ctx);
}
@@ -292,7 +289,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
ctx->b.vram = 0;
/* Begin a new CS. */
- r600_emit_command_buffer(ctx->b.rings.gfx.cs, &ctx->start_cs_cmd);
+ r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd);
/* Re-emit states. */
r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
@@ -326,6 +323,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
}
r600_mark_atom_dirty(ctx, &ctx->vertex_shader.atom);
r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
+ r600_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
if (ctx->blend_state.cso)
r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);
@@ -361,7 +359,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
ctx->last_primitive_type = -1;
ctx->last_start_instance = -1;
- ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
+ ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw;
}
/* The max number of bytes to copy per packet. */
@@ -372,7 +370,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
struct pipe_resource *src, uint64_t src_offset,
unsigned size)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
assert(size);
assert(rctx->screen->b.has_cp_dma);
@@ -418,9 +416,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
}
/* This must be done after r600_need_cs_space. */
- src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
+ src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src,
RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
- dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
+ dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst,
RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
@@ -453,7 +451,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
uint64_t src_offset,
uint64_t size)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
+ struct radeon_winsys_cs *cs = rctx->b.dma.cs;
unsigned i, ncopy, csize;
struct r600_resource *rdst = (struct r600_resource*)dst;
struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -471,9 +469,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
for (i = 0; i < ncopy; i++) {
csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
/* emit reloc before writing cs so that cs is always in consistent state */
- radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ,
RADEON_PRIO_SDMA_BUFFER);
- radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE,
RADEON_PRIO_SDMA_BUFFER);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 9f4cda2c142..bd00dcb642c 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -178,11 +178,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen,
goto fail;
}
- rctx->b.rings.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
- r600_context_gfx_flush, rctx,
- rscreen->b.trace_bo ?
- rscreen->b.trace_bo->cs_buf : NULL);
- rctx->b.rings.gfx.flush = r600_context_gfx_flush;
+ rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
+ r600_context_gfx_flush, rctx,
+ rscreen->b.trace_bo ?
+ rscreen->b.trace_bo->cs_buf : NULL);
+ rctx->b.gfx.flush = r600_context_gfx_flush;
rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256,
0, PIPE_USAGE_DEFAULT, FALSE);
@@ -323,6 +323,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
return family >= CHIP_CEDAR ? 1 : 0;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return family >= CHIP_CEDAR ? 4 : 0;
@@ -338,13 +339,13 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 520b03f605d..bbb55adef82 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,7 +38,7 @@
#include "tgsi/tgsi_scan.h"
-#define R600_NUM_ATOMS 42
+#define R600_NUM_ATOMS 43
#define R600_MAX_VIEWPORTS 16
@@ -116,6 +116,7 @@ struct r600_db_misc_state {
unsigned log_samples;
unsigned db_shader_control;
bool htile_clear;
+ uint8_t ps_conservative_z;
};
struct r600_cb_misc_state {
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index fc6335ae8bc..560197c82b5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2044,6 +2044,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+ shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT];
if (shader->vs_as_gs_a)
vs_add_primid_output(&ctx, key.vs.prim_id_out);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index c240e7110c1..2040f732bf5 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -76,6 +76,8 @@ struct r600_shader {
boolean uses_tex_buffers;
boolean gs_prim_id_input;
+ uint8_t ps_conservative_z;
+
/* Size in bytes of a data item in the ring(s) (single vertex data).
Stages with only one ring items 123 will be set to 0. */
unsigned ring_item_sizes[4];
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 1be3e1b4de5..c2d4abc5ea1 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -244,7 +244,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
float offset_units = state->offset_units;
float offset_scale = state->offset_scale;
@@ -760,7 +760,7 @@ r600_create_sampler_view(struct pipe_context *ctx,
static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct pipe_clip_state *state = &rctx->clip_state.state;
radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
@@ -774,7 +774,7 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx,
static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_scissor_state *rstate = &rctx->scissor;
struct pipe_scissor_state *state;
bool do_disable_workaround = false;
@@ -1334,7 +1334,7 @@ static void r600_get_sample_position(struct pipe_context *ctx,
static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
unsigned max_dist = 0;
if (rctx->b.family == CHIP_R600) {
@@ -1401,7 +1401,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
unsigned nr_cbufs = state->nr_cbufs;
struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0];
@@ -1432,7 +1432,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
radeon_set_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
reloc = radeon_add_to_buffer_list(&rctx->b,
- &rctx->b.rings.gfx,
+ &rctx->b.gfx,
(struct r600_resource*)cb[i]->base.texture,
RADEON_USAGE_READWRITE,
cb[i]->base.texture->nr_samples > 1 ?
@@ -1445,7 +1445,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
radeon_set_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
reloc = radeon_add_to_buffer_list(&rctx->b,
- &rctx->b.rings.gfx,
+ &rctx->b.gfx,
cb[i]->cb_buffer_fmask,
RADEON_USAGE_READWRITE,
cb[i]->base.texture->nr_samples > 1 ?
@@ -1458,7 +1458,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
radeon_set_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
reloc = radeon_add_to_buffer_list(&rctx->b,
- &rctx->b.rings.gfx,
+ &rctx->b.gfx,
cb[i]->cb_buffer_cmask,
RADEON_USAGE_READWRITE,
cb[i]->base.texture->nr_samples > 1 ?
@@ -1497,7 +1497,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
if (state->zsbuf) {
struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
- &rctx->b.rings.gfx,
+ &rctx->b.gfx,
(struct r600_resource*)state->zsbuf->texture,
RADEON_USAGE_READWRITE,
surf->base.texture->nr_samples > 1 ?
@@ -1570,7 +1570,7 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
@@ -1600,7 +1600,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_db_state *a = (struct r600_db_state*)atom;
if (a->rsurf && a->rsurf->db_htile_surface) {
@@ -1610,7 +1610,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
- reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+ reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rtex->htile_buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = reloc_idx;
@@ -1621,13 +1621,28 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
unsigned db_render_control = 0;
unsigned db_render_override =
S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
+ if (rctx->b.chip_class >= R700) {
+ switch (a->ps_conservative_z) {
+ default: /* fall through */
+ case TGSI_FS_DEPTH_LAYOUT_ANY:
+ db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_ANY_Z);
+ break;
+ case TGSI_FS_DEPTH_LAYOUT_GREATER:
+ db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_GREATER_THAN_Z);
+ break;
+ case TGSI_FS_DEPTH_LAYOUT_LESS:
+ db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_LESS_THAN_Z);
+ break;
+ }
+ }
+
if (a->occlusion_query_enabled) {
if (rctx->b.chip_class >= R700) {
db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1);
@@ -1687,7 +1702,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_config_state *a = (struct r600_config_state*)atom;
radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
@@ -1696,7 +1711,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *
static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
while (dirty_mask) {
@@ -1725,7 +1740,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
}
}
@@ -1736,7 +1751,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
unsigned reg_alu_constbuf_size,
unsigned reg_alu_const_cache)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -1758,7 +1773,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
}
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
@@ -1774,7 +1789,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
dirty_mask &= ~(1 << buffer_index);
@@ -1810,7 +1825,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
struct r600_samplerview_state *state,
unsigned resource_id_base)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -1825,7 +1840,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
radeon_emit(cs, (resource_id_base + resource_index) * 7);
radeon_emit_array(cs, rview->tex_resource_words, 7);
- reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource,
RADEON_USAGE_READ,
r600_get_sampler_view_priority(rview->tex_resource));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
@@ -1857,7 +1872,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
unsigned resource_id_base,
unsigned border_color_reg)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint32_t dirty_mask = texinfo->states.dirty_mask;
while (dirty_mask) {
@@ -1918,7 +1933,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a
static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
unsigned tmp;
tmp = S_009508_DISABLE_CUBE_ANISO(1) |
@@ -1936,26 +1951,26 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
struct r600_sample_mask *s = (struct r600_sample_mask*)a;
uint8_t mask = s->sample_mask;
- radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
+ radeon_set_context_reg(rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK,
mask | (mask << 8) | (mask << 16) | (mask << 24));
}
static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_cso_state *state = (struct r600_cso_state*)a;
struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer,
RADEON_USAGE_READ,
RADEON_PRIO_INTERNAL_SHADER));
}
static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
uint32_t v2 = 0, primid = 0;
@@ -1990,7 +2005,7 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom
static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
struct r600_resource *rbuffer;
@@ -2002,7 +2017,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
@@ -2011,7 +2026,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
@@ -2787,6 +2802,7 @@ void r600_update_db_shader_control(struct r600_context * rctx)
{
bool dual_export;
unsigned db_shader_control;
+ uint8_t ps_conservative_z;
if (!rctx->ps_shader) {
return;
@@ -2798,6 +2814,8 @@ void r600_update_db_shader_control(struct r600_context * rctx)
db_shader_control = rctx->ps_shader->current->db_shader_control |
S_02880C_DUAL_EXPORT_ENABLE(dual_export);
+ ps_conservative_z = rctx->ps_shader->current->shader.ps_conservative_z;
+
/* When alpha test is enabled we can't trust the hw to make the proper
* decision on the order in which ztest should be run related to fragment
* shader execution.
@@ -2811,8 +2829,10 @@ void r600_update_db_shader_control(struct r600_context * rctx)
db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
}
- if (db_shader_control != rctx->db_misc_state.db_shader_control) {
+ if (db_shader_control != rctx->db_misc_state.db_shader_control ||
+ ps_conservative_z != rctx->db_misc_state.ps_conservative_z) {
rctx->db_misc_state.db_shader_control = db_shader_control;
+ rctx->db_misc_state.ps_conservative_z = ps_conservative_z;
r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -2845,7 +2865,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
unsigned pitch,
unsigned bpp)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
+ struct radeon_winsys_cs *cs = rctx->b.dma.cs;
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
@@ -2918,9 +2938,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
cheight = cheight > copy_height ? copy_height : cheight;
size = (cheight * pitch) / 4;
/* emit reloc before writing cs so that cs is always in consistent state */
- radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, RADEON_USAGE_READ,
RADEON_PRIO_SDMA_TEXTURE);
- radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, RADEON_USAGE_WRITE,
RADEON_PRIO_SDMA_TEXTURE);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
cs->buf[cs->cdw++] = base >> 8;
@@ -2954,7 +2974,7 @@ static void r600_dma_copy(struct pipe_context *ctx,
unsigned src_x, src_y;
unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
- if (rctx->b.rings.dma.cs == NULL) {
+ if (rctx->b.dma.cs == NULL) {
goto fallback;
}
@@ -3086,6 +3106,7 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
+ r600_add_atom(rctx, &rctx->b.render_cond_atom, id++);
r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 178005a8574..d629194ca6e 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -71,12 +71,12 @@ void r600_init_atom(struct r600_context *rctx,
void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
{
- r600_emit_command_buffer(rctx->b.rings.gfx.cs, ((struct r600_cso_state*)atom)->cb);
+ r600_emit_command_buffer(rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb);
}
void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom;
unsigned alpha_ref = a->sx_alpha_ref;
@@ -211,7 +211,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct pipe_blend_color *state = &rctx->blend_color.state;
radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
@@ -223,7 +223,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
@@ -257,7 +257,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
@@ -709,7 +709,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_viewport_state *rstate = &rctx->viewport;
struct pipe_viewport_state *state;
uint32_t dirty_mask;
@@ -1460,7 +1460,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_clip_misc_state *state = &rctx->clip_misc_state;
radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -1477,7 +1477,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
struct r600_context *rctx = (struct r600_context *)ctx;
struct pipe_draw_info info = *dinfo;
struct pipe_index_buffer ib = {};
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+ bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
uint64_t mask;
if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
@@ -1490,8 +1491,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
/* make sure that the gfx ring is only one active */
- if (rctx->b.rings.dma.cs && rctx->b.rings.dma.cs->cdw) {
- rctx->b.rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+ if (rctx->b.dma.cs && rctx->b.dma.cs->cdw) {
+ rctx->b.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
}
if (!r600_update_derived_state(rctx)) {
@@ -1663,7 +1664,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
/* Draw packets. */
if (!info.indirect) {
- cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, 0);
cs->buf[cs->cdw++] = info.instance_count;
}
@@ -1675,20 +1676,20 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
rctx->vgt_state.last_draw_was_indirect = true;
rctx->last_start_instance = -1;
- cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, 0);
cs->buf[cs->cdw++] = EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE;
cs->buf[cs->cdw++] = va;
cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
- cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
(struct r600_resource*)info.indirect,
RADEON_USAGE_READ,
RADEON_PRIO_DRAW_INDIRECT);
}
if (info.indexed) {
- cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, 0);
cs->buf[cs->cdw++] = ib.index_size == 4 ?
(VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) :
(VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0));
@@ -1696,7 +1697,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
if (ib.user_buffer) {
unsigned size_bytes = info.count*ib.index_size;
unsigned size_dw = align(size_bytes, 4) / 4;
- cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, render_cond_bit);
cs->buf[cs->cdw++] = info.count;
cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_IMMEDIATE;
memcpy(cs->buf+cs->cdw, ib.user_buffer, size_bytes);
@@ -1705,13 +1706,13 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
uint64_t va = r600_resource(ib.buffer)->gpu_address + ib.offset;
if (likely(!info.indirect)) {
- cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, render_cond_bit);
cs->buf[cs->cdw++] = va;
cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
cs->buf[cs->cdw++] = info.count;
cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
- cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
(struct r600_resource*)ib.buffer,
RADEON_USAGE_READ,
RADEON_PRIO_INDEX_BUFFER);
@@ -1719,20 +1720,20 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
else {
uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
- cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, 1, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, 1, 0);
cs->buf[cs->cdw++] = va;
cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
- cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
(struct r600_resource*)ib.buffer,
RADEON_USAGE_READ,
RADEON_PRIO_INDEX_BUFFER);
- cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0);
cs->buf[cs->cdw++] = max_size;
- cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit);
cs->buf[cs->cdw++] = info.indirect_offset;
cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
}
@@ -1752,17 +1753,17 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
cs->buf[cs->cdw++] = 0; /* unused */
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+ cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
RADEON_PRIO_SO_FILLED_SIZE);
}
if (likely(!info.indirect)) {
- cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit);
cs->buf[cs->cdw++] = info.count;
}
else {
- cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, rctx->b.predicate_drawing);
+ cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit);
cs->buf[cs->cdw++] = info.indirect_offset;
}
cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
@@ -1938,7 +1939,7 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader;
if (!shader)
@@ -1946,7 +1947,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
r600_emit_command_buffer(cs, &shader->command_buffer);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->bo,
RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER));
}
@@ -2669,12 +2670,12 @@ void r600_init_common_state_functions(struct r600_context *rctx)
void r600_trace_emit(struct r600_context *rctx)
{
struct r600_screen *rscreen = rctx->screen;
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint64_t va;
uint32_t reloc;
va = rscreen->b.trace_bo->gpu_address;
- reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
+ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rscreen->b.trace_bo,
RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
radeon_emit(cs, va & 0xFFFFFFFFUL);
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 6bba88cb574..53f5ad6db6a 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -781,6 +781,14 @@
#define S_028D0C_COPY_CENTROID(x) (((x) & 0x1) << 7)
#define S_028D0C_COPY_SAMPLE(x) (((x) & 0x1) << 8)
#define S_028D0C_R700_PERFECT_ZPASS_COUNTS(x) (((x) & 0x1) << 15)
+#define S_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) & 0x03) << 13)
+#define G_028D0C_CONSERVATIVE_Z_EXPORT(x) (((x) >> 13) & 0x03)
+#define C_028D0C_CONSERVATIVE_Z_EXPORT 0xFFFF9FFF
+#define V_028D0C_EXPORT_ANY_Z 0
+#define V_028D0C_EXPORT_LESS_THAN_Z 1
+#define V_028D0C_EXPORT_GREATER_THAN_Z 2
+#define V_028D0C_EXPORT_RESERVED 3
+
#define R_028D10_DB_RENDER_OVERRIDE 0x028D10
#define V_028D10_FORCE_OFF 0
#define V_028D10_FORCE_ENABLE 1
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 0dc6c918331..c294e516408 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -34,11 +34,11 @@ boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage)
{
- if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
+ if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
return TRUE;
}
- if (ctx->rings.dma.cs && ctx->rings.dma.cs->cdw &&
- ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
+ if (ctx->dma.cs && ctx->dma.cs->cdw &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
return TRUE;
}
return FALSE;
@@ -60,26 +60,26 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
rusage = RADEON_USAGE_WRITE;
}
- if (ctx->rings.gfx.cs->cdw != ctx->initial_gfx_cs_size &&
- ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
+ if (ctx->gfx.cs->cdw != ctx->initial_gfx_cs_size &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
resource->cs_buf, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
- ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
return NULL;
} else {
- ctx->rings.gfx.flush(ctx, 0, NULL);
+ ctx->gfx.flush(ctx, 0, NULL);
busy = true;
}
}
- if (ctx->rings.dma.cs &&
- ctx->rings.dma.cs->cdw &&
- ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
+ if (ctx->dma.cs &&
+ ctx->dma.cs->cdw &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
resource->cs_buf, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
- ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
return NULL;
} else {
- ctx->rings.dma.flush(ctx, 0, NULL);
+ ctx->dma.flush(ctx, 0, NULL);
busy = true;
}
}
@@ -90,9 +90,9 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
} else {
/* We will be wait for the GPU. Wait for any offloaded
* CS flush to complete to avoid busy-waiting in the winsys. */
- ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
- if (ctx->rings.dma.cs)
- ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
+ ctx->ws->cs_sync_flush(ctx->gfx.cs);
+ if (ctx->dma.cs)
+ ctx->ws->cs_sync_flush(ctx->dma.cs);
}
}
@@ -240,7 +240,7 @@ static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
return rctx->screen->has_cp_dma ||
- (dword_aligned && (rctx->rings.dma.cs ||
+ (dword_aligned && (rctx->dma.cs ||
rctx->screen->has_streamout));
}
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index b5a1dafb273..ad067ce4e76 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -50,21 +50,6 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
enum radeon_bo_priority priority)
{
assert(usage);
-
- /* Make sure that all previous rings are flushed so that everything
- * looks serialized from the driver point of view.
- */
- if (!ring->flushing) {
- if (ring == &rctx->rings.gfx) {
- if (rctx->rings.dma.cs) {
- /* flush dma ring */
- rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
- }
- } else {
- /* flush gfx ring */
- rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
- }
- }
return rctx->ws->cs_add_buffer(ring->cs, rbo->cs_buf, usage,
rbo->domains, priority) * 4;
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 0ad36849645..3599692a857 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -31,6 +31,7 @@
#include "util/u_memory.h"
#include "util/u_format_s3tc.h"
#include "util/u_upload_mgr.h"
+#include "os/os_time.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
#include "radeon/radeon_video.h"
@@ -40,6 +41,12 @@
#define HAVE_LLVM 0
#endif
+struct r600_multi_fence {
+ struct pipe_reference reference;
+ struct pipe_fence_handle *gfx;
+ struct pipe_fence_handle *sdma;
+};
+
/*
* pipe_context
*/
@@ -110,10 +117,14 @@ void r600_draw_rectangle(struct blitter_context *blitter,
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
{
+ /* Flush the GFX IB if it's not empty. */
+ if (ctx->gfx.cs->cdw > ctx->initial_gfx_cs_size)
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+
/* Flush if there's not enough space. */
- if ((num_dw + ctx->rings.dma.cs->cdw) > ctx->rings.dma.cs->max_dw) {
- ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- assert((num_dw + ctx->rings.dma.cs->cdw) <= ctx->rings.dma.cs->max_dw);
+ if ((num_dw + ctx->dma.cs->cdw) > ctx->dma.cs->max_dw) {
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ assert((num_dw + ctx->dma.cs->cdw) <= ctx->dma.cs->max_dw);
}
}
@@ -123,17 +134,6 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
void r600_preflush_suspend_features(struct r600_common_context *ctx)
{
- /* Disable render condition. */
- ctx->saved_render_cond = NULL;
- ctx->saved_render_cond_cond = FALSE;
- ctx->saved_render_cond_mode = 0;
- if (ctx->current_render_cond) {
- ctx->saved_render_cond = ctx->current_render_cond;
- ctx->saved_render_cond_cond = ctx->current_render_cond_cond;
- ctx->saved_render_cond_mode = ctx->current_render_cond_mode;
- ctx->b.render_condition(&ctx->b, NULL, FALSE, 0);
- }
-
/* suspend queries */
ctx->queries_suspended_for_flush = false;
if (ctx->num_cs_dw_nontimer_queries_suspend) {
@@ -161,44 +161,52 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
r600_resume_nontimer_queries(ctx);
r600_resume_timer_queries(ctx);
}
-
- /* Re-enable render condition. */
- if (ctx->saved_render_cond) {
- ctx->b.render_condition(&ctx->b, ctx->saved_render_cond,
- ctx->saved_render_cond_cond,
- ctx->saved_render_cond_mode);
- }
}
static void r600_flush_from_st(struct pipe_context *ctx,
struct pipe_fence_handle **fence,
unsigned flags)
{
+ struct pipe_screen *screen = ctx->screen;
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
unsigned rflags = 0;
+ struct pipe_fence_handle *gfx_fence = NULL;
+ struct pipe_fence_handle *sdma_fence = NULL;
if (flags & PIPE_FLUSH_END_OF_FRAME)
rflags |= RADEON_FLUSH_END_OF_FRAME;
- if (rctx->rings.dma.cs) {
- rctx->rings.dma.flush(rctx, rflags, NULL);
+ if (rctx->dma.cs) {
+ rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
+ }
+ rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+
+ /* Both engines can signal out of order, so we need to keep both fences. */
+ if (gfx_fence || sdma_fence) {
+ struct r600_multi_fence *multi_fence =
+ CALLOC_STRUCT(r600_multi_fence);
+ if (!multi_fence)
+ return;
+
+ multi_fence->reference.count = 1;
+ multi_fence->gfx = gfx_fence;
+ multi_fence->sdma = sdma_fence;
+
+ screen->fence_reference(screen, fence, NULL);
+ *fence = (struct pipe_fence_handle*)multi_fence;
}
- rctx->rings.gfx.flush(rctx, rflags, fence);
}
static void r600_flush_dma_ring(void *ctx, unsigned flags,
struct pipe_fence_handle **fence)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
+ struct radeon_winsys_cs *cs = rctx->dma.cs;
- if (!cs->cdw) {
- return;
- }
-
- rctx->rings.dma.flushing = true;
- rctx->ws->cs_flush(cs, flags, fence, 0);
- rctx->rings.dma.flushing = false;
+ if (cs->cdw)
+ rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence, 0);
+ if (fence)
+ rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
}
static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
@@ -270,10 +278,10 @@ bool r600_common_context_init(struct r600_common_context *rctx,
return false;
if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
- rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
- r600_flush_dma_ring,
- rctx, NULL);
- rctx->rings.dma.flush = r600_flush_dma_ring;
+ rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
+ r600_flush_dma_ring,
+ rctx, NULL);
+ rctx->dma.flush = r600_flush_dma_ring;
}
return true;
@@ -281,10 +289,10 @@ bool r600_common_context_init(struct r600_common_context *rctx,
void r600_common_context_cleanup(struct r600_common_context *rctx)
{
- if (rctx->rings.gfx.cs)
- rctx->ws->cs_destroy(rctx->rings.gfx.cs);
- if (rctx->rings.dma.cs)
- rctx->ws->cs_destroy(rctx->rings.dma.cs);
+ if (rctx->gfx.cs)
+ rctx->ws->cs_destroy(rctx->gfx.cs);
+ if (rctx->dma.cs)
+ rctx->ws->cs_destroy(rctx->dma.cs);
if (rctx->ctx)
rctx->ws->ctx_destroy(rctx->ctx);
@@ -297,6 +305,7 @@ void r600_common_context_cleanup(struct r600_common_context *rctx)
if (rctx->allocator_so_filled_size) {
u_suballocator_destroy(rctx->allocator_so_filled_size);
}
+ rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
}
void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
@@ -754,12 +763,19 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
}
static void r600_fence_reference(struct pipe_screen *screen,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *fence)
+ struct pipe_fence_handle **dst,
+ struct pipe_fence_handle *src)
{
- struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
-
- rws->fence_reference(ptr, fence);
+ struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws;
+ struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst;
+ struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src;
+
+ if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
+ ws->fence_reference(&(*rdst)->gfx, NULL);
+ ws->fence_reference(&(*rdst)->sdma, NULL);
+ FREE(*rdst);
+ }
+ *rdst = rsrc;
}
static boolean r600_fence_finish(struct pipe_screen *screen,
@@ -767,8 +783,24 @@ static boolean r600_fence_finish(struct pipe_screen *screen,
uint64_t timeout)
{
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
+ struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+ int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+
+ if (rfence->sdma) {
+ if (!rws->fence_wait(rws, rfence->sdma, timeout))
+ return false;
+
+ /* Recompute the timeout after waiting. */
+ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t time = os_time_get_nano();
+ timeout = abs_timeout > time ? abs_timeout - time : 0;
+ }
+ }
+
+ if (!rfence->gfx)
+ return true;
- return rws->fence_wait(rws, fence, timeout);
+ return rws->fence_wait(rws, rfence->gfx, timeout);
}
static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index c300c0b3332..ebe633b9125 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -365,16 +365,10 @@ struct r600_streamout {
struct r600_ring {
struct radeon_winsys_cs *cs;
- bool flushing;
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence);
};
-struct r600_rings {
- struct r600_ring gfx;
- struct r600_ring dma;
-};
-
struct r600_common_context {
struct pipe_context b; /* base class */
@@ -383,7 +377,9 @@ struct r600_common_context {
struct radeon_winsys_ctx *ctx;
enum radeon_family family;
enum chip_class chip_class;
- struct r600_rings rings;
+ struct r600_ring gfx;
+ struct r600_ring dma;
+ struct pipe_fence_handle *last_sdma_fence;
unsigned initial_gfx_cs_size;
unsigned gpu_reset_counter;
@@ -421,14 +417,11 @@ struct r600_common_context {
unsigned num_draw_calls;
/* Render condition. */
- struct pipe_query *current_render_cond;
- unsigned current_render_cond_mode;
- boolean current_render_cond_cond;
- boolean predicate_drawing;
- /* For context flushing. */
- struct pipe_query *saved_render_cond;
- boolean saved_render_cond_cond;
- unsigned saved_render_cond_mode;
+ struct r600_atom render_cond_atom;
+ struct pipe_query *render_cond;
+ unsigned render_cond_mode;
+ boolean render_cond_invert;
+ bool render_cond_force_off; /* for u_blitter */
/* MSAA sample locations.
* The first index is the sample index.
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 9a5402583f4..8c2b601a96c 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -172,7 +172,7 @@ static unsigned event_type_for_stream(struct r600_query *query)
static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
{
- struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
uint64_t va;
r600_update_occlusion_query_state(ctx, query->type, 1);
@@ -225,7 +225,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
default:
assert(0);
}
- r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+ r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
if (r600_is_timer_query(query->type))
@@ -236,7 +236,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
{
- struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
uint64_t va;
/* The queries which need begin already called this in begin_query. */
@@ -287,7 +287,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
default:
assert(0);
}
- r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+ r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
query->buffer.results_end += query->result_size;
@@ -303,53 +303,60 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
r600_update_prims_generated_query_state(ctx, query->type, -1);
}
-static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_query *query,
- int operation, bool flag_wait)
+static void r600_emit_query_predication(struct r600_common_context *ctx,
+ struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
- uint32_t op = PRED_OP(operation);
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+ struct r600_query *query = (struct r600_query*)ctx->render_cond;
+ struct r600_query_buffer *qbuf;
+ uint32_t op;
+ bool flag_wait;
+
+ if (!query)
+ return;
+
+ flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+ ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
+
+ switch (query->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ op = PRED_OP(PREDICATION_OP_ZPASS);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
+ break;
+ default:
+ assert(0);
+ return;
+ }
/* if true then invert, see GL_ARB_conditional_render_inverted */
- if (ctx->current_render_cond_cond)
+ if (ctx->render_cond_invert)
op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visable/overflow */
else
op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
- if (operation == PREDICATION_OP_CLEAR) {
- ctx->need_gfx_cs_space(&ctx->b, 3, FALSE);
-
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
- radeon_emit(cs, 0);
- radeon_emit(cs, PRED_OP(PREDICATION_OP_CLEAR));
- } else {
- struct r600_query_buffer *qbuf;
- unsigned count;
- /* Find how many results there are. */
- count = 0;
- for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
- count += qbuf->results_end / query->result_size;
- }
-
- ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
-
- op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
-
- /* emit predicate packets for all data blocks */
- for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
- unsigned results_base = 0;
- uint64_t va = qbuf->buf->gpu_address;
-
- while (results_base < qbuf->results_end) {
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
- radeon_emit(cs, va + results_base);
- radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
- r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
- RADEON_PRIO_QUERY);
- results_base += query->result_size;
+ op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
- /* set CONTINUE bit for all packets except the first */
- op |= PREDICATION_CONTINUE;
- }
+ /* emit predicate packets for all data blocks */
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+ unsigned results_base = 0;
+ uint64_t va = qbuf->buf->gpu_address;
+
+ while (results_base < qbuf->results_end) {
+ radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+ radeon_emit(cs, va + results_base);
+ radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
+ r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, RADEON_USAGE_READ,
+ RADEON_PRIO_QUERY);
+ results_base += query->result_size;
+
+ /* set CONTINUE bit for all packets except the first */
+ op |= PREDICATION_CONTINUE;
}
}
}
@@ -532,7 +539,7 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
case PIPE_QUERY_TIMESTAMP_DISJOINT:
return;
case PIPE_QUERY_GPU_FINISHED:
- rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, &rquery->fence);
+ ctx->flush(ctx, &rquery->fence, 0);
return;
case R600_QUERY_DRAW_CALLS:
rquery->end_result = rctx->num_draw_calls;
@@ -820,42 +827,20 @@ static void r600_render_condition(struct pipe_context *ctx,
uint mode)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
- bool wait_flag = false;
-
- rctx->current_render_cond = query;
- rctx->current_render_cond_cond = condition;
- rctx->current_render_cond_mode = mode;
-
- if (query == NULL) {
- if (rctx->predicate_drawing) {
- rctx->predicate_drawing = false;
- r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false);
- }
- return;
- }
+ struct r600_query *rquery = (struct r600_query*)query;
+ struct r600_query_buffer *qbuf;
+ struct r600_atom *atom = &rctx->render_cond_atom;
- if (mode == PIPE_RENDER_COND_WAIT ||
- mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
- wait_flag = true;
- }
+ rctx->render_cond = query;
+ rctx->render_cond_invert = condition;
+ rctx->render_cond_mode = mode;
- rctx->predicate_drawing = true;
+ /* Compute the size of SET_PREDICATION packets. */
+ atom->num_dw = 0;
+ for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+ atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
- switch (rquery->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
- break;
- default:
- assert(0);
- }
+ rctx->set_atom_dirty(rctx, atom, query != NULL);
}
static void r600_suspend_queries(struct r600_common_context *ctx,
@@ -939,7 +924,7 @@ void r600_resume_timer_queries(struct r600_common_context *ctx)
/* Get backends mask */
void r600_query_init_backend_mask(struct r600_common_context *ctx)
{
- struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
struct r600_resource *buffer;
uint32_t *results;
unsigned num_backends = ctx->screen->info.r600_num_backends;
@@ -990,7 +975,7 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
radeon_emit(cs, buffer->gpu_address);
radeon_emit(cs, buffer->gpu_address >> 32);
- r600_emit_reloc(ctx, &ctx->rings.gfx, buffer,
+ r600_emit_reloc(ctx, &ctx->gfx, buffer,
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
/* analyze results */
@@ -1024,6 +1009,7 @@ void r600_query_init(struct r600_common_context *rctx)
rctx->b.begin_query = r600_begin_query;
rctx->b.end_query = r600_end_query;
rctx->b.get_query_result = r600_get_query_result;
+ rctx->render_cond_atom.emit = r600_emit_query_predication;
if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0)
rctx->b.render_condition = r600_render_condition;
diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c
index 33403b572af..e977ed9fa10 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -152,7 +152,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
{
- struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
unsigned reg_strmout_cntl;
/* The register is at different places on different ASICs. */
@@ -184,7 +184,7 @@ static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
unsigned *stride_in_dw = rctx->streamout.stride_in_dw;
unsigned i, update_flags = 0;
@@ -216,7 +216,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
radeon_emit(cs, va >> 8); /* BUFFER_BASE */
- r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
+ r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
/* R7xx requires this packet after updating BUFFER_BASE.
@@ -226,7 +226,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
radeon_emit(cs, i);
radeon_emit(cs, va >> 8);
- r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
+ r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
}
}
@@ -244,7 +244,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
radeon_emit(cs, va); /* src address lo */
radeon_emit(cs, va >> 32); /* src address hi */
- r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
+ r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
} else {
/* Start from the beginning. */
@@ -267,7 +267,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
void r600_emit_streamout_end(struct r600_common_context *rctx)
{
- struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
unsigned i;
uint64_t va;
@@ -288,7 +288,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx)
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
- r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
+ r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
/* Zero the buffer size. The counters (primitives generated,
@@ -336,8 +336,8 @@ static void r600_emit_streamout_enable(struct r600_common_context *rctx,
S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
}
- radeon_set_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
- radeon_set_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
+ radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+ radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val);
}
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index edfdfe33187..3126cce8c22 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1324,7 +1324,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
{
int i;
- if (rctx->current_render_cond)
+ if (rctx->render_cond)
return;
for (i = 0; i < fb->nr_cbufs; i++) {
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 33b01361aa5..0c643e5cd59 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -947,6 +947,12 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
dec->msg->body.decode.width_in_samples = dec->base.width;
dec->msg->body.decode.height_in_samples = dec->base.height;
+ if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
+ (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
+ dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16;
+ dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16;
+ }
+
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
dec->msg->body.decode.db_pitch = dec->base.width;
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index 32bfc32073b..f56c6cf6cb4 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -244,8 +244,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
return codec != PIPE_VIDEO_FORMAT_MPEG4;
return true;
case PIPE_VIDEO_FORMAT_VC1:
- /* FIXME: VC-1 simple/main profile is broken */
- return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED;
+ return true;
case PIPE_VIDEO_FORMAT_HEVC:
/* Carrizo only supports HEVC Main */
return rscreen->family >= CHIP_CARRIZO &&
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index e53af1dd6b5..2de237b4716 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -50,7 +50,7 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
uint64_t src_offset,
uint64_t size)
{
- struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+ struct radeon_winsys_cs *cs = ctx->b.dma.cs;
unsigned i, ncopy, csize;
struct r600_resource *rdst = (struct r600_resource*)dst;
struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -61,9 +61,9 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
r600_need_dma_space(&ctx->b, ncopy * 7);
- radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ,
RADEON_PRIO_SDMA_BUFFER);
- radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rdst, RADEON_USAGE_WRITE,
RADEON_PRIO_SDMA_BUFFER);
for (i = 0; i < ncopy; i++) {
@@ -112,7 +112,7 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
unsigned pitch,
unsigned bpe)
{
- struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+ struct radeon_winsys_cs *cs = ctx->b.dma.cs;
struct si_screen *sscreen = ctx->screen;
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
@@ -171,9 +171,9 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
ncopy = (copy_height + cheight - 1) / cheight;
r600_need_dma_space(&ctx->b, ncopy * 12);
- radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rsrc->resource,
RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
- radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rdst->resource,
RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
copy_height = size * 4 / pitch;
@@ -224,7 +224,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
unsigned copy_height, y_align;
unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
- if (sctx->b.rings.dma.cs == NULL) {
+ if (sctx->b.dma.cs == NULL) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index fce014a1e6b..13d8e6f2a5f 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -29,20 +29,23 @@ enum si_blitter_op /* bitmask */
{
SI_SAVE_TEXTURES = 1,
SI_SAVE_FRAMEBUFFER = 2,
- SI_DISABLE_RENDER_COND = 4,
+ SI_SAVE_FRAGMENT_STATE = 4,
+ SI_DISABLE_RENDER_COND = 8,
- SI_CLEAR = 0,
+ SI_CLEAR = SI_SAVE_FRAGMENT_STATE,
- SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER,
+ SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
SI_COPY = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES |
- SI_DISABLE_RENDER_COND,
+ SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
- SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES,
+ SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES |
+ SI_SAVE_FRAGMENT_STATE,
- SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_DISABLE_RENDER_COND,
+ SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE |
+ SI_DISABLE_RENDER_COND,
- SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER
+ SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE
};
static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
@@ -51,22 +54,25 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
r600_suspend_nontimer_queries(&sctx->b);
- util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
- util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
- util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
- util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
- util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
- util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
+ util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
+ util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
+ util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
- util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
- util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
- util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
- util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
- util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
- util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
+ util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
(struct pipe_stream_output_target**)sctx->b.streamout.targets);
+ util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
+
+ if (op & SI_SAVE_FRAGMENT_STATE) {
+ util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
+ util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
+ util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
+ util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
+ util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
+ util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
+ util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
+ }
if (op & SI_SAVE_FRAMEBUFFER)
util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state);
@@ -80,17 +86,15 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
sctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
}
- if ((op & SI_DISABLE_RENDER_COND) && sctx->b.current_render_cond) {
- util_blitter_save_render_condition(sctx->blitter,
- sctx->b.current_render_cond,
- sctx->b.current_render_cond_cond,
- sctx->b.current_render_cond_mode);
- }
+ if (op & SI_DISABLE_RENDER_COND)
+ sctx->b.render_cond_force_off = true;
}
static void si_blitter_end(struct pipe_context *ctx)
{
struct si_context *sctx = (struct si_context *)ctx;
+
+ sctx->b.render_cond_force_off = false;
r600_resume_nontimer_queries(&sctx->b);
}
@@ -731,9 +735,69 @@ static void si_flush_resource(struct pipe_context *ctx,
}
}
+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ unsigned offset, unsigned size,
+ const void *clear_value_ptr,
+ int clear_value_size)
+{
+ struct si_context *sctx = (struct si_context*)ctx;
+ uint32_t dword_value;
+ unsigned i;
+
+ assert(offset % clear_value_size == 0);
+ assert(size % clear_value_size == 0);
+
+ if (clear_value_size > 4) {
+ const uint32_t *u32 = clear_value_ptr;
+ bool clear_dword_duplicated = true;
+
+ /* See if we can lower large fills to dword fills. */
+ for (i = 1; i < clear_value_size / 4; i++)
+ if (u32[0] != u32[i]) {
+ clear_dword_duplicated = false;
+ break;
+ }
+
+ if (!clear_dword_duplicated) {
+ /* Use transform feedback for 64-bit, 96-bit, and
+ * 128-bit fills.
+ */
+ union pipe_color_union clear_value;
+
+ memcpy(&clear_value, clear_value_ptr, clear_value_size);
+ si_blitter_begin(ctx, SI_DISABLE_RENDER_COND);
+ util_blitter_clear_buffer(sctx->blitter, dst, offset,
+ size, clear_value_size / 4,
+ &clear_value);
+ si_blitter_end(ctx);
+ return;
+ }
+ }
+
+ /* Expand the clear value to a dword. */
+ switch (clear_value_size) {
+ case 1:
+ dword_value = *(uint8_t*)clear_value_ptr;
+ dword_value |= (dword_value << 8) |
+ (dword_value << 16) |
+ (dword_value << 24);
+ break;
+ case 2:
+ dword_value = *(uint16_t*)clear_value_ptr;
+ dword_value |= dword_value << 16;
+ break;
+ default:
+ dword_value = *(uint32_t*)clear_value_ptr;
+ }
+
+ sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, false);
+}
+
void si_init_blit_functions(struct si_context *sctx)
{
sctx->b.b.clear = si_clear;
+ sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 697e60a50d9..2d551dd0e6b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -227,7 +227,7 @@ static void si_launch_grid(
uint32_t pc, const void *input)
{
struct si_context *sctx = (struct si_context*)ctx;
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_compute *program = sctx->cs_shader_state.program;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
struct r600_resource *input_buffer = program->input_buffer;
@@ -253,10 +253,10 @@ static void si_launch_grid(
radeon_emit(cs, 0x80000000);
radeon_emit(cs, 0x80000000);
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_INV_ICACHE |
- SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_FLUSH_WITH_INV_L2 |
SI_CONTEXT_FLAG_COMPUTE;
si_emit_cache_flush(sctx, NULL);
@@ -274,7 +274,7 @@ static void si_launch_grid(
kernel_args_size = program->input_size + num_work_size_bytes + 8 /* For scratch va */;
kernel_args = sctx->b.ws->buffer_map(input_buffer->cs_buf,
- sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+ sctx->b.gfx.cs, PIPE_TRANSFER_WRITE);
for (i = 0; i < 3; i++) {
kernel_args[i] = grid_layout[i];
kernel_args[i + 3] = grid_layout[i] * block_layout[i];
@@ -294,7 +294,7 @@ static void si_launch_grid(
shader->scratch_bytes_per_wave *
num_waves_for_scratch);
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
shader->scratch_bo,
RADEON_USAGE_READWRITE,
RADEON_PRIO_SCRATCH_BUFFER);
@@ -310,7 +310,7 @@ static void si_launch_grid(
kernel_args_va = input_buffer->gpu_address;
kernel_args_va += kernel_args_offset;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, input_buffer,
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
@@ -338,7 +338,7 @@ static void si_launch_grid(
if (!buffer) {
continue;
}
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_COMPUTE_GLOBAL);
}
@@ -361,7 +361,7 @@ static void si_launch_grid(
#if HAVE_LLVM >= 0x0306
shader_va += pc;
#endif
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
@@ -449,10 +449,10 @@ static void si_launch_grid(
si_pm4_free_state(sctx, pm4, ~0);
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
- SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_INV_ICACHE |
- SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_FLAG_COMPUTE;
si_emit_cache_flush(sctx, NULL);
}
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index d4bd7b28cf3..0bf85a04db7 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -46,8 +46,9 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
uint64_t dst_va, uint64_t src_va,
unsigned size, unsigned flags)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
+ uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0;
uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
@@ -63,14 +64,14 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ radeon_emit(cs, size | wr_confirm | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
} else {
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ radeon_emit(cs, size | wr_confirm | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
}
}
@@ -79,8 +80,9 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
uint64_t dst_va, unsigned size,
uint32_t clear_value, unsigned flags)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
+ uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0;
uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
@@ -94,26 +96,74 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
radeon_emit(cs, 0);
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ radeon_emit(cs, size | wr_confirm | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
} else {
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, clear_value); /* DATA [31:0] */
radeon_emit(cs, sync_flag | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ radeon_emit(cs, size | wr_confirm | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
}
}
+static unsigned get_flush_flags(struct si_context *sctx, bool is_framebuffer)
+{
+ if (is_framebuffer)
+ return SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+
+ return SI_CONTEXT_INV_SMEM_L1 |
+ SI_CONTEXT_INV_VMEM_L1 |
+ (sctx->b.chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0);
+}
+
+static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer)
+{
+ return is_framebuffer || sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+}
+
+static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst,
+ struct pipe_resource *src, unsigned byte_count,
+ unsigned remaining_size, unsigned *flags)
+{
+ si_need_cs_space(sctx);
+
+ /* This must be done after need_cs_space. */
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ (struct r600_resource*)dst,
+ RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
+ if (src)
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ (struct r600_resource*)src,
+ RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
+
+ /* Flush the caches for the first copy only.
+ * Also wait for the previous CP DMA operations.
+ */
+ if (sctx->b.flags) {
+ si_emit_cache_flush(sctx, NULL);
+ *flags |= SI_CP_DMA_RAW_WAIT;
+ }
+
+ /* Do the synchronization after the last dma, so that all data
+ * is written to memory.
+ */
+ if (byte_count == remaining_size)
+ *flags |= R600_CP_DMA_SYNC;
+}
+
+/* Alignment for optimal performance. */
+#define CP_DMA_ALIGNMENT 32
/* The max number of bytes to copy per packet. */
-#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - CP_DMA_ALIGNMENT)
static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer)
{
struct si_context *sctx = (struct si_context*)ctx;
- unsigned flush_flags, tc_l2_flag;
+ unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
+ unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
if (!size)
return;
@@ -126,52 +176,27 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* Fallback for unaligned clears. */
if (offset % 4 != 0 || size % 4 != 0) {
- uint32_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
- sctx->b.rings.gfx.cs,
- PIPE_TRANSFER_WRITE);
- size /= 4;
- for (unsigned i = 0; i < size; i++)
- *map++ = value;
+ uint8_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
+ sctx->b.gfx.cs,
+ PIPE_TRANSFER_WRITE);
+ map += offset;
+ for (unsigned i = 0; i < size; i++) {
+ unsigned byte_within_dword = (offset + i) % 4;
+ *map++ = (value >> (byte_within_dword * 8)) & 0xff;
+ }
return;
}
uint64_t va = r600_resource(dst)->gpu_address + offset;
- /* Flush the caches where the resource is bound. */
- if (is_framebuffer) {
- flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- tc_l2_flag = 0;
- } else {
- flush_flags = SI_CONTEXT_INV_TC_L1 |
- (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
- SI_CONTEXT_INV_KCACHE;
- tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
- }
-
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- flush_flags;
+ /* Flush the caches. */
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags;
while (size) {
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
unsigned dma_flags = tc_l2_flag;
- si_need_cs_space(sctx);
-
- /* This must be done after need_cs_space. */
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
- (struct r600_resource*)dst, RADEON_USAGE_WRITE,
- RADEON_PRIO_CP_DMA);
-
- /* Flush the caches for the first copy only.
- * Also wait for the previous CP DMA operations. */
- if (sctx->b.flags) {
- si_emit_cache_flush(sctx, NULL);
- dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
- }
-
- /* Do the synchronization after the last copy, so that all data is written to memory. */
- if (size == byte_count)
- dma_flags |= R600_CP_DMA_SYNC;
+ si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, &dma_flags);
/* Emit the clear packet. */
si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, dma_flags);
@@ -188,12 +213,53 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
r600_resource(dst)->TC_L2_dirty = true;
}
+/**
+ * Realign the CP DMA engine. This must be done after a copy with an unaligned
+ * size.
+ *
+ * \param size Remaining size to the CP DMA alignment.
+ */
+static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size)
+{
+ uint64_t va;
+ unsigned dma_flags = 0;
+ unsigned scratch_size = CP_DMA_ALIGNMENT * 2;
+
+ assert(size < CP_DMA_ALIGNMENT);
+
+ /* Use the scratch buffer as the dummy buffer. The 3D engine should be
+ * idle at this point.
+ */
+ if (!sctx->scratch_buffer ||
+ sctx->scratch_buffer->b.b.width0 < scratch_size) {
+ r600_resource_reference(&sctx->scratch_buffer, NULL);
+ sctx->scratch_buffer =
+ si_resource_create_custom(&sctx->screen->b.b,
+ PIPE_USAGE_DEFAULT,
+ scratch_size);
+ if (!sctx->scratch_buffer)
+ return;
+ sctx->emit_scratch_reloc = true;
+ }
+
+ si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
+ &sctx->scratch_buffer->b.b, size, size, &dma_flags);
+
+ va = sctx->scratch_buffer->gpu_address;
+ si_emit_cp_dma_copy_buffer(sctx, va, va + CP_DMA_ALIGNMENT, size,
+ dma_flags);
+}
+
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
uint64_t dst_offset, uint64_t src_offset, unsigned size,
bool is_framebuffer)
{
- unsigned flush_flags, tc_l2_flag;
+ uint64_t main_dst_offset, main_src_offset;
+ unsigned skipped_size = 0;
+ unsigned realign_size = 0;
+ unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
+ unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
if (!size)
return;
@@ -207,50 +273,63 @@ void si_copy_buffer(struct si_context *sctx,
dst_offset += r600_resource(dst)->gpu_address;
src_offset += r600_resource(src)->gpu_address;
- /* Flush the caches where the resource is bound. */
- if (is_framebuffer) {
- flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- tc_l2_flag = 0;
- } else {
- flush_flags = SI_CONTEXT_INV_TC_L1 |
- (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
- SI_CONTEXT_INV_KCACHE;
- tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ /* If the size is not aligned, we must add a dummy copy at the end
+ * just to align the internal counter. Otherwise, the DMA engine
+ * would slow down by an order of magnitude for following copies.
+ */
+ if (size % CP_DMA_ALIGNMENT)
+ realign_size = CP_DMA_ALIGNMENT - (size % CP_DMA_ALIGNMENT);
+
+ /* If the copy begins unaligned, we must start copying from the next
+ * aligned block and the skipped part should be copied after everything
+ * else has been copied. Only the src alignment matters, not dst.
+ */
+ if (src_offset % CP_DMA_ALIGNMENT) {
+ skipped_size = CP_DMA_ALIGNMENT - (src_offset % CP_DMA_ALIGNMENT);
+ /* The main part will be skipped if the size is too small. */
+ skipped_size = MIN2(skipped_size, size);
+ size -= skipped_size;
}
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- flush_flags;
+ /* Flush the caches. */
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags;
+
+ /* This is the main part doing the copying. Src is always aligned. */
+ main_dst_offset = dst_offset + skipped_size;
+ main_src_offset = src_offset + skipped_size;
while (size) {
- unsigned sync_flags = tc_l2_flag;
+ unsigned dma_flags = tc_l2_flag;
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
- si_need_cs_space(sctx);
+ si_cp_dma_prepare(sctx, dst, src, byte_count,
+ size + skipped_size + realign_size,
+ &dma_flags);
- /* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
- if (sctx->b.flags) {
- si_emit_cache_flush(sctx, NULL);
- sync_flags |= SI_CP_DMA_RAW_WAIT;
- }
+ si_emit_cp_dma_copy_buffer(sctx, main_dst_offset, main_src_offset,
+ byte_count, dma_flags);
- /* Do the synchronization after the last copy, so that all data is written to memory. */
- if (size == byte_count) {
- sync_flags |= R600_CP_DMA_SYNC;
- }
+ size -= byte_count;
+ main_src_offset += byte_count;
+ main_dst_offset += byte_count;
+ }
- /* This must be done after r600_need_cs_space. */
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
- RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
- RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
+ /* Copy the part we skipped because src wasn't aligned. */
+ if (skipped_size) {
+ unsigned dma_flags = tc_l2_flag;
- si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
+ si_cp_dma_prepare(sctx, dst, src, skipped_size,
+ skipped_size + realign_size,
+ &dma_flags);
- size -= byte_count;
- src_offset += byte_count;
- dst_offset += byte_count;
+ si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset,
+ skipped_size, dma_flags);
}
+ /* Finally, realign the engine if the size wasn't aligned. */
+ if (realign_size)
+ si_cp_dma_realign_engine(sctx, realign_size);
+
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
sctx->b.flags |= flush_flags;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a8ff6f27319..3fa3a9bbd6e 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -117,7 +117,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
desc->list_dirty = false;
@@ -152,14 +152,14 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
if (!rview->resource)
continue;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rview->resource, RADEON_USAGE_READ,
r600_get_sampler_view_priority(rview->resource));
}
if (!views->desc.buffer)
return;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, views->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
}
@@ -177,12 +177,12 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
(struct si_sampler_view*)view;
if (rview->resource)
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rview->resource, RADEON_USAGE_READ,
r600_get_sampler_view_priority(rview->resource));
if (rview->dcc_buffer && rview->dcc_buffer != rview->resource)
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rview->dcc_buffer, RADEON_USAGE_READ,
RADEON_PRIO_DCC);
@@ -264,7 +264,7 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
{
if (!states->desc.buffer)
return;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, states->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
}
@@ -334,14 +334,14 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
while (mask) {
int i = u_bit_scan64(&mask);
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource*)buffers->buffers[i],
buffers->shader_usage, buffers->priority);
}
if (!buffers->desc.buffer)
return;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
buffers->desc.buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_DESCRIPTORS);
}
@@ -362,14 +362,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
if (!sctx->vertex_buffer[vb].buffer)
continue;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
}
if (!desc->buffer)
return;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_DESCRIPTORS);
}
@@ -396,7 +396,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
if (!desc->buffer)
return false;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_DESCRIPTORS);
@@ -440,7 +440,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
desc[3] = sctx->vertex_elements->rsrc_word3[i];
if (!bound[ve->vertex_buffer_index]) {
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource*)vb->buffer,
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
bound[ve->vertex_buffer_index] = true;
@@ -525,7 +525,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
buffers->buffers[slot] = buffer;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource*)buffer,
buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1llu << slot;
@@ -620,7 +620,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
S_008F0C_ADD_TID_ENABLE(add_tid);
pipe_resource_reference(&buffers->buffers[slot], buffer);
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource*)buffer,
buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1llu << slot;
@@ -670,8 +670,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
* VS_PARTIAL_FLUSH is required if the buffers are going to be
* used as an input immediately.
*/
- sctx->b.flags |= SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_INV_TC_L1 |
+ sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+ SI_CONTEXT_INV_VMEM_L1 |
SI_CONTEXT_VS_PARTIAL_FLUSH;
}
@@ -710,7 +710,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
/* Set the resource. */
pipe_resource_reference(&buffers->buffers[bufidx],
buffer);
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource*)buffer,
buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1llu << bufidx;
@@ -809,7 +809,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
old_va, buf);
buffers->desc.list_dirty = true;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
@@ -838,7 +838,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
old_va, buf);
buffers->desc.list_dirty = true;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
}
@@ -863,7 +863,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
old_va, buf);
views->desc.list_dirty = true;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rbuffer, RADEON_USAGE_READ,
RADEON_PRIO_SAMPLER_BUFFER);
}
@@ -948,7 +948,7 @@ static void si_emit_shader_pointer(struct si_context *sctx,
struct si_descriptors *desc,
unsigned sh_base, bool keep_dirty)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
uint64_t va;
if (!desc->pointer_dirty || !desc->buffer)
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 581e89f42d8..240d96190a9 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -49,7 +49,7 @@ static void si_dma_copy_buffer(struct si_context *ctx,
uint64_t src_offset,
uint64_t size)
{
- struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+ struct radeon_winsys_cs *cs = ctx->b.dma.cs;
unsigned i, ncopy, csize, max_csize, sub_cmd, shift;
struct r600_resource *rdst = (struct r600_resource*)dst;
struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -78,9 +78,9 @@ static void si_dma_copy_buffer(struct si_context *ctx,
r600_need_dma_space(&ctx->b, ncopy * 5);
- radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ,
RADEON_PRIO_SDMA_BUFFER);
- radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rdst, RADEON_USAGE_WRITE,
RADEON_PRIO_SDMA_BUFFER);
for (i = 0; i < ncopy; i++) {
@@ -111,7 +111,7 @@ static void si_dma_copy_tile(struct si_context *ctx,
unsigned pitch,
unsigned bpp)
{
- struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+ struct radeon_winsys_cs *cs = ctx->b.dma.cs;
struct si_screen *sscreen = ctx->screen;
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
@@ -177,9 +177,9 @@ static void si_dma_copy_tile(struct si_context *ctx,
ncopy = (size / SI_DMA_COPY_MAX_SIZE_DW) + !!(size % SI_DMA_COPY_MAX_SIZE_DW);
r600_need_dma_space(&ctx->b, ncopy * 9);
- radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rsrc->resource,
RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
- radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rdst->resource,
RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
for (i = 0; i < ncopy; i++) {
@@ -221,7 +221,7 @@ void si_dma_copy(struct pipe_context *ctx,
unsigned src_x, src_y;
unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
- if (sctx->b.rings.dma.cs == NULL) {
+ if (sctx->b.dma.cs == NULL) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 7c147e2e44c..baa02293c41 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -29,17 +29,22 @@
/* initialize */
void si_need_cs_space(struct si_context *ctx)
{
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct radeon_winsys_cs *dma = ctx->b.dma.cs;
+
+ /* Flush the DMA IB if it's not empty. */
+ if (dma && dma->cdw)
+ ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
/* There are two memory usage counters in the winsys for all buffers
* that have been added (cs_add_buffer) and two counters in the pipe
* driver for those that haven't been added yet.
*/
- if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
+ if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.gfx.cs,
ctx->b.vram, ctx->b.gtt))) {
ctx->b.gtt = 0;
ctx->b.vram = 0;
- ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
return;
}
ctx->b.gtt = 0;
@@ -49,32 +54,36 @@ void si_need_cs_space(struct si_context *ctx)
* and just flush if there is not enough space left.
*/
if (unlikely(cs->cdw > cs->max_dw - 2048))
- ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
void si_context_gfx_flush(void *context, unsigned flags,
struct pipe_fence_handle **fence)
{
struct si_context *ctx = context;
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
struct radeon_winsys *ws = ctx->b.ws;
+ if (ctx->gfx_flush_in_progress)
+ return;
+
+ ctx->gfx_flush_in_progress = true;
+
if (cs->cdw == ctx->b.initial_gfx_cs_size &&
(!fence || ctx->last_gfx_fence)) {
if (fence)
ws->fence_reference(fence, ctx->last_gfx_fence);
if (!(flags & RADEON_FLUSH_ASYNC))
ws->cs_sync_flush(cs);
+ ctx->gfx_flush_in_progress = false;
return;
}
- ctx->b.rings.gfx.flushing = true;
-
r600_preflush_suspend_features(&ctx->b);
ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
- SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
/* this is probably not needed anymore */
SI_CONTEXT_PS_PARTIAL_FLUSH;
si_emit_cache_flush(ctx, NULL);
@@ -111,7 +120,6 @@ void si_context_gfx_flush(void *context, unsigned flags,
/* Flush the CS. */
ws->cs_flush(cs, flags, &ctx->last_gfx_fence,
ctx->screen->b.cs_count++);
- ctx->b.rings.gfx.flushing = false;
if (fence)
ws->fence_reference(fence, ctx->last_gfx_fence);
@@ -121,6 +129,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
si_check_vm_faults(ctx);
si_begin_new_cs(ctx);
+ ctx->gfx_flush_in_progress = false;
}
void si_begin_new_cs(struct si_context *ctx)
@@ -144,9 +153,9 @@ void si_begin_new_cs(struct si_context *ctx)
/* Flush read caches at the beginning of CS. */
ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
- SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_INV_ICACHE;
/* set all valid group as dirty so they get reemited on
@@ -156,6 +165,8 @@ void si_begin_new_cs(struct si_context *ctx)
/* The CS initialization should be emitted before everything else. */
si_pm4_emit(ctx, ctx->init_config);
+ if (ctx->init_config_gs_rings)
+ si_pm4_emit(ctx, ctx->init_config_gs_rings);
ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
ctx->framebuffer.dirty_zsbuf = true;
@@ -173,6 +184,7 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->spi_map);
si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
+ si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);
ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
@@ -182,7 +194,7 @@ void si_begin_new_cs(struct si_context *ctx)
r600_postflush_resume_features(&ctx->b);
- ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
+ ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw;
/* Invalidate various draw states so that they are emitted before
* the first draw call. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 60baad3d13c..9a0fe808ee3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -50,6 +50,8 @@ static void si_destroy_context(struct pipe_context *context)
sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
si_pm4_free_state(sctx, sctx->init_config, ~0);
+ if (sctx->init_config_gs_rings)
+ si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
@@ -139,10 +141,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->b.b.create_video_buffer = vl_video_buffer_create;
}
- sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
- sctx, sscreen->b.trace_bo ?
- sscreen->b.trace_bo->cs_buf : NULL);
- sctx->b.rings.gfx.flush = si_context_gfx_flush;
+ sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
+ sctx, sscreen->b.trace_bo ?
+ sscreen->b.trace_bo->cs_buf : NULL);
+ sctx->b.gfx.flush = si_context_gfx_flush;
/* Border colors. */
sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
@@ -337,6 +339,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 42cd8803c36..05d52fe19dc 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -46,15 +46,12 @@
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
-/* Cache used by scalar memory (SMEM) instructions. They also use TC
- * as a second level cache, which isn't flushed by this.
- * Other names: constant cache, data cache, DCACHE */
-#define SI_CONTEXT_INV_KCACHE (R600_CONTEXT_PRIVATE_FLAG << 1)
-/* Caches used by vector memory (VMEM) instructions.
- * L1 can optionally be bypassed (GLC=1) and can only be used by shaders.
- * L2 is used by shaders and can be used by other blocks (CP, sDMA). */
-#define SI_CONTEXT_INV_TC_L1 (R600_CONTEXT_PRIVATE_FLAG << 2)
-#define SI_CONTEXT_INV_TC_L2 (R600_CONTEXT_PRIVATE_FLAG << 3)
+/* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
+#define SI_CONTEXT_INV_SMEM_L1 (R600_CONTEXT_PRIVATE_FLAG << 1)
+/* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
+#define SI_CONTEXT_INV_VMEM_L1 (R600_CONTEXT_PRIVATE_FLAG << 2)
+/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
+#define SI_CONTEXT_INV_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 3)
/* Framebuffer caches. */
#define SI_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4)
#define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5)
@@ -176,6 +173,7 @@ struct si_context {
struct pipe_fence_handle *last_gfx_fence;
struct si_shader_ctx_state fixed_func_tcs_shader;
LLVMTargetMachineRef tm;
+ bool gfx_flush_in_progress;
/* Atoms (direct states). */
union si_state_atoms atoms;
@@ -204,6 +202,7 @@ struct si_context {
/* Precomputed states. */
struct si_pm4_state *init_config;
+ struct si_pm4_state *init_config_gs_rings;
bool init_config_has_vgt_flush;
struct si_pm4_state *vgt_shader_config[4];
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index f16933c5f98..c4ef2e78c50 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -127,10 +127,10 @@ void si_pm4_free_state(struct si_context *sctx,
void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
for (int i = 0; i < state->nbo; ++i) {
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, state->bo[i],
state->bo_usage[i], state->bo_priority[i]);
}
@@ -139,7 +139,7 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
} else {
struct r600_resource *ib = state->indirect_buffer;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, ib,
RADEON_USAGE_READ,
RADEON_PRIO_IB2);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a119cbdc16c..354d0646b99 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -164,49 +164,6 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
}
/**
- * Given a semantic name and index of a parameter and a mask of used parameters
- * (inputs or outputs), return the index of the parameter in the list of all
- * used parameters.
- *
- * For example, assume this list of parameters:
- * POSITION, PSIZE, GENERIC0, GENERIC2
- * which has the mask:
- * 11000000000101
- * Then:
- * querying POSITION returns 0,
- * querying PSIZE returns 1,
- * querying GENERIC0 returns 2,
- * querying GENERIC2 returns 3.
- *
- * Which can be used as an offset to a parameter buffer in units of vec4s.
- */
-static int get_param_index(unsigned semantic_name, unsigned index,
- uint64_t mask)
-{
- unsigned unique_index = si_shader_io_get_unique_index(semantic_name, index);
- int i, param_index = 0;
-
- /* If not present... */
- if (!((1llu << unique_index) & mask))
- return -1;
-
- for (i = 0; mask; i++) {
- uint64_t bit = 1llu << i;
-
- if (bit & mask) {
- if (i == unique_index)
- return param_index;
-
- mask &= ~bit;
- param_index++;
- }
- }
-
- assert(!"unreachable");
- return -1;
-}
-
-/**
* Get the value of a shader input parameter and extract a bitfield.
*/
static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
@@ -775,6 +732,7 @@ static LLVMValueRef fetch_input_gs(
struct tgsi_shader_info *info = &shader->selector->info;
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
+ unsigned param;
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
return get_primitive_id(bld_base, swizzle);
@@ -805,12 +763,10 @@ static LLVMValueRef fetch_input_gs(
vtx_offset_param),
4);
+ param = si_shader_io_get_unique_index(semantic_name, semantic_index);
args[0] = si_shader_ctx->esgs_ring;
args[1] = vtx_offset;
- args[2] = lp_build_const_int32(gallivm,
- (get_param_index(semantic_name, semantic_index,
- shader->selector->inputs_read) * 4 +
- swizzle) * 256);
+ args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
args[3] = uint->zero;
args[4] = uint->one; /* OFFEN */
args[5] = uint->zero; /* IDXEN */
@@ -2016,9 +1972,6 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
si_shader_ctx->param_es2gs_offset);
- uint64_t enabled_outputs = si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL ?
- es->key.tes.es_enabled_outputs :
- es->key.vs.es_enabled_outputs;
unsigned chan;
int i;
@@ -2031,11 +1984,8 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
continue;
- param_index = get_param_index(info->output_semantic_name[i],
- info->output_semantic_index[i],
- enabled_outputs);
- if (param_index < 0)
- continue;
+ param_index = si_shader_io_get_unique_index(info->output_semantic_name[i],
+ info->output_semantic_index[i]);
for (chan = 0; chan < 4; chan++) {
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
@@ -4023,10 +3973,6 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
fprintf(f, !i ? "%u" : ", %u",
key->vs.instance_divisors[i]);
fprintf(f, "}\n");
-
- if (key->vs.as_es)
- fprintf(f, " es_enabled_outputs = 0x%"PRIx64"\n",
- key->vs.es_enabled_outputs);
fprintf(f, " as_es = %u\n", key->vs.as_es);
fprintf(f, " as_ls = %u\n", key->vs.as_ls);
fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id);
@@ -4037,9 +3983,6 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
break;
case PIPE_SHADER_TESS_EVAL:
- if (key->tes.as_es)
- fprintf(f, " es_enabled_outputs = 0x%"PRIx64"\n",
- key->tes.es_enabled_outputs);
fprintf(f, " as_es = %u\n", key->tes.as_es);
fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id);
break;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index fd5500c1ab3..3400a03d7bb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -26,14 +26,15 @@
* Christian König <[email protected]>
*/
-/* How linking tessellation shader inputs and outputs works.
+/* How linking shader inputs and outputs between vertex, tessellation, and
+ * geometry shaders works.
*
* Inputs and outputs between shaders are stored in a buffer. This buffer
* lives in LDS (typical case for tessellation), but it can also live
- * in memory. Each input or output has a fixed location within a vertex.
+ * in memory (ESGS). Each input or output has a fixed location within a vertex.
* The highest used input or output determines the stride between vertices.
*
- * Since tessellation is only enabled in the OpenGL core profile,
+ * Since GS and tessellation are only possible in the OpenGL core profile,
* only these semantics are valid for per-vertex data:
*
* Name Location
@@ -57,13 +58,11 @@
* That's how independent shaders agree on input and output locations.
* The si_shader_io_get_unique_index function assigns the locations.
*
- * Other required information for calculating the input and output addresses
- * like the vertex stride, the patch stride, and the offsets where per-vertex
- * and per-patch data start, is passed to the shader via user data SGPRs.
- * The offsets and strides are calculated at draw time and aren't available
- * at compile time.
- *
- * The same approach should be used for linking ES->GS in the future.
+ * For tessellation, other required information for calculating the input and
+ * output addresses like the vertex stride, the patch stride, and the offsets
+ * where per-vertex and per-patch data start, is passed to the shader via
+ * user data SGPRs. The offsets and strides are calculated at draw time and
+ * aren't available at compile time.
*/
#ifndef SI_SHADER_H
@@ -202,13 +201,16 @@ struct si_shader_selector {
bool forces_persample_interp_for_persp;
bool forces_persample_interp_for_linear;
+ unsigned esgs_itemsize;
+ unsigned gs_input_verts_per_prim;
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
unsigned gs_num_invocations;
- unsigned gsvs_itemsize;
+ unsigned max_gs_stream; /* count - 1 */
+ unsigned gsvs_vertex_size;
+ unsigned max_gsvs_emit_size;
/* masks of "get_unique_index" bits */
- uint64_t inputs_read;
uint64_t outputs_written;
uint32_t patch_outputs_written;
uint32_t ps_colors_written;
@@ -241,7 +243,6 @@ union si_shader_key {
/* Mask of "get_unique_index" bits - which outputs are read
* by the next stage (needed by ES).
* This describes how outputs are laid out in memory. */
- uint64_t es_enabled_outputs;
unsigned as_es:1; /* export shader */
unsigned as_ls:1; /* local shader */
unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
@@ -253,7 +254,6 @@ union si_shader_key {
/* Mask of "get_unique_index" bits - which outputs are read
* by the next stage (needed by ES).
* This describes how outputs are laid out in memory. */
- uint64_t es_enabled_outputs;
unsigned as_es:1; /* export shader */
unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} tes; /* tessellation evaluation shader */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 18b64056bc7..93847d5ec2f 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -248,7 +248,7 @@ static unsigned si_pack_float_12p4(float x)
*/
static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_state_blend *blend = sctx->queued.named.blend;
uint32_t mask = 0, i;
@@ -265,7 +265,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
*
* Reproducible with Unigine Heaven 4.0 and drirc missing.
*/
- if (blend->dual_src_blend &&
+ if (blend && blend->dual_src_blend &&
sctx->ps_shader.cso &&
(sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
mask = 0;
@@ -454,7 +454,7 @@ static void si_set_blend_color(struct pipe_context *ctx,
static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
@@ -486,7 +486,7 @@ static void si_set_clip_state(struct pipe_context *ctx,
static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
@@ -496,7 +496,7 @@ static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct tgsi_shader_info *info = si_get_vs_info(sctx);
unsigned window_space =
info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
@@ -541,7 +541,7 @@ static void si_set_scissor_states(struct pipe_context *ctx,
static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct pipe_scissor_state *states = sctx->scissors.states;
unsigned mask = sctx->scissors.dirty_mask;
@@ -593,7 +593,7 @@ static void si_set_viewport_states(struct pipe_context *ctx,
static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct pipe_viewport_state *states = sctx->viewports.states;
unsigned mask = sctx->viewports.dirty_mask;
@@ -830,7 +830,7 @@ static void si_delete_rs_state(struct pipe_context *ctx, void *state)
*/
static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
@@ -989,7 +989,7 @@ static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned db_shader_control;
@@ -2125,8 +2125,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
* Flush all CB and DB caches here because all buffers can be used
* for write by both TC (with shader image stores) and CB/DB.
*/
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
/* Take the maximum of the old and new count. If the new count is lower,
@@ -2233,7 +2233,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
unsigned i, nr_cbufs = state->nr_cbufs;
struct r600_texture *tex = NULL;
@@ -2252,20 +2252,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
}
tex = (struct r600_texture *)cb->base.texture;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
&tex->resource, RADEON_USAGE_READWRITE,
tex->surface.nsamples > 1 ?
RADEON_PRIO_COLOR_BUFFER_MSAA :
RADEON_PRIO_COLOR_BUFFER);
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
tex->cmask_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_CMASK);
}
if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) {
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
tex->dcc_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_DCC);
}
@@ -2305,14 +2305,14 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
&rtex->resource, RADEON_USAGE_READWRITE,
zb->base.texture->nr_samples > 1 ?
RADEON_PRIO_DEPTH_BUFFER_MSAA :
RADEON_PRIO_DEPTH_BUFFER);
if (zb->db_htile_data_base) {
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
rtex->htile_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_HTILE);
}
@@ -2354,7 +2354,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
static void si_emit_msaa_sample_locs(struct si_context *sctx,
struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned nr_samples = sctx->framebuffer.nr_samples;
cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
@@ -2363,7 +2363,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
sctx->ps_iter_samples,
@@ -2846,7 +2846,7 @@ static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned mask = sctx->sample_mask.sample_mask;
radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
@@ -3044,8 +3044,8 @@ static void si_texture_barrier(struct pipe_context *ctx)
{
struct si_context *sctx = (struct si_context *)ctx;
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_FLUSH_AND_INV_CB;
}
@@ -3069,6 +3069,7 @@ static void si_init_config(struct si_context *sctx);
void si_init_state_functions(struct si_context *sctx)
{
+ si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
@@ -3444,6 +3445,9 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
}
+ if (sctx->b.family == CHIP_STONEY)
+ si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
+
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 8b9a311cd3f..f5ca661f8d7 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -110,6 +110,7 @@ union si_state_atoms {
struct {
/* The order matters. */
struct r600_atom *cache_flush;
+ struct r600_atom *render_cond;
struct r600_atom *streamout_begin;
struct r600_atom *streamout_enable; /* must be after streamout_begin */
struct r600_atom *framebuffer;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index cf0891a2ab7..753abc8c103 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -108,7 +108,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
const struct pipe_draw_info *info,
unsigned *num_patches)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_shader_ctx_state *ls = &sctx->vs_shader;
/* The TES pointer will only be used for sctx->last_tcs.
* It would be wrong to think that TCS = TES. */
@@ -353,7 +353,7 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx,
static void si_emit_scratch_reloc(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
if (!sctx->emit_scratch_reloc)
return;
@@ -362,7 +362,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
sctx->spi_tmpring_size);
if (sctx->scratch_buffer) {
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
sctx->scratch_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SCRATCH_BUFFER);
@@ -373,7 +373,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
/* rast_prim is the primitive type after GS. */
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer;
@@ -401,7 +401,7 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
static void si_emit_draw_registers(struct si_context *sctx,
const struct pipe_draw_info *info)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned prim = si_conv_pipe_prim(info->mode);
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
@@ -455,8 +455,9 @@ static void si_emit_draw_packets(struct si_context *sctx,
const struct pipe_draw_info *info,
const struct pipe_index_buffer *ib)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
+ bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
if (info->count_from_stream_output) {
struct r600_so_target *t =
@@ -476,7 +477,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
radeon_emit(cs, 0); /* unused */
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
RADEON_PRIO_SO_FILLED_SIZE);
}
@@ -530,7 +531,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
} else {
si_invalidate_draw_sh_constants(sctx);
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource *)info->indirect,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
}
@@ -540,7 +541,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
ib->index_size;
uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource *)ib->buffer,
RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
@@ -563,7 +564,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
radeon_emit(cs, index_max_size);
- radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, sctx->b.predicate_drawing));
+ radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, render_cond_bit));
radeon_emit(cs, info->indirect_offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
@@ -571,7 +572,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
} else {
index_va += info->start * ib->index_size;
- radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, sctx->b.predicate_drawing));
+ radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
radeon_emit(cs, index_max_size);
radeon_emit(cs, index_va);
radeon_emit(cs, (index_va >> 32UL) & 0xFF);
@@ -590,13 +591,13 @@ static void si_emit_draw_packets(struct si_context *sctx,
radeon_emit(cs, indirect_va);
radeon_emit(cs, indirect_va >> 32);
- radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, sctx->b.predicate_drawing));
+ radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, render_cond_bit));
radeon_emit(cs, info->indirect_offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
} else {
- radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, sctx->b.predicate_drawing));
+ radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
radeon_emit(cs, info->count);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
@@ -604,12 +605,10 @@ static void si_emit_draw_packets(struct si_context *sctx,
}
}
-#define BOTH_ICACHE_KCACHE (SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_KCACHE)
-
void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
{
struct r600_common_context *sctx = &si_ctx->b;
- struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->gfx.cs;
uint32_t cp_coher_cntl = 0;
uint32_t compute =
PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE));
@@ -624,12 +623,12 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
if (sctx->flags & SI_CONTEXT_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_KCACHE)
+ if (sctx->flags & SI_CONTEXT_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_TC_L1)
+ if (sctx->flags & SI_CONTEXT_INV_VMEM_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
+ if (sctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
/* TODO: this might not be needed. */
@@ -843,7 +842,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
/* VI reads index buffers through TC L2. */
if (info->indexed && sctx->b.chip_class <= CIK &&
r600_resource(ib.buffer)->TC_L2_dirty) {
- sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
r600_resource(ib.buffer)->TC_L2_dirty = false;
}
@@ -909,10 +908,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
void si_trace_emit(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
sctx->trace_id++;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf,
RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4a3a04caa52..7f6511cf01b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -33,6 +33,7 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "util/u_simple_shaders.h"
static void si_set_tesseval_regs(struct si_shader *shader,
@@ -194,6 +195,8 @@ static void si_shader_es(struct si_shader *shader)
}
assert(num_sgprs <= 104);
+ si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+ shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
@@ -209,32 +212,17 @@ static void si_shader_es(struct si_shader *shader)
si_set_tesseval_regs(shader, pm4);
}
-static unsigned si_gs_get_max_stream(struct si_shader *shader)
-{
- struct pipe_stream_output_info *so = &shader->selector->so;
- unsigned max_stream = 0, i;
-
- if (so->num_outputs == 0)
- return 0;
-
- for (i = 0; i < so->num_outputs; i++) {
- if (so->output[i].stream > max_stream)
- max_stream = so->output[i].stream;
- }
- return max_stream;
-}
-
static void si_shader_gs(struct si_shader *shader)
{
- unsigned gs_vert_itemsize = shader->selector->info.num_outputs * 16;
+ unsigned gs_vert_itemsize = shader->selector->gsvs_vertex_size;
unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
- unsigned gsvs_itemsize = (gs_vert_itemsize * gs_max_vert_out) >> 2;
+ unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
unsigned cut_mode;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
uint64_t va;
- unsigned max_stream = si_gs_get_max_stream(shader);
+ unsigned max_stream = shader->selector->max_gs_stream;
/* The GSVS_RING_ITEMSIZE register takes 15 bits */
assert(gsvs_itemsize < (1 << 15));
@@ -265,8 +253,6 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
- si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
@@ -529,10 +515,8 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
if (sctx->tes_shader.cso)
key->vs.as_ls = 1;
- else if (sctx->gs_shader.cso) {
+ else if (sctx->gs_shader.cso)
key->vs.as_es = 1;
- key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
- }
if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
sctx->ps_shader.cso->info.uses_primid)
@@ -543,10 +527,9 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
break;
case PIPE_SHADER_TESS_EVAL:
- if (sctx->gs_shader.cso) {
+ if (sctx->gs_shader.cso)
key->tes.as_es = 1;
- key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
- } else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
+ else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
key->tes.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
@@ -713,25 +696,22 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
sel->gs_num_invocations =
sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
- sel->gsvs_itemsize = sel->info.num_outputs * 16 *
- sel->gs_max_out_vertices;
+ sel->gsvs_vertex_size = sel->info.num_outputs * 16;
+ sel->max_gsvs_emit_size = sel->gsvs_vertex_size *
+ sel->gs_max_out_vertices;
- for (i = 0; i < sel->info.num_inputs; i++) {
- unsigned name = sel->info.input_semantic_name[i];
- unsigned index = sel->info.input_semantic_index[i];
+ sel->max_gs_stream = 0;
+ for (i = 0; i < sel->so.num_outputs; i++)
+ sel->max_gs_stream = MAX2(sel->max_gs_stream,
+ sel->so.output[i].stream);
- switch (name) {
- case TGSI_SEMANTIC_PRIMID:
- break;
- default:
- sel->inputs_read |=
- 1llu << si_shader_io_get_unique_index(name, index);
- }
- }
+ sel->gs_input_verts_per_prim =
+ u_vertices_per_prim(sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
break;
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
for (i = 0; i < sel->info.num_outputs; i++) {
unsigned name = sel->info.output_semantic_name[i];
unsigned index = sel->info.output_semantic_index[i];
@@ -748,6 +728,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
1llu << si_shader_io_get_unique_index(name, index);
}
}
+ sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
break;
case PIPE_SHADER_FRAGMENT:
for (i = 0; i < sel->info.num_outputs; i++) {
@@ -937,7 +918,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
struct tgsi_shader_info *psinfo;
@@ -1009,7 +990,7 @@ bcolor:
static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
unsigned input_ena;
@@ -1077,6 +1058,7 @@ static void si_init_config_add_vgt_flush(struct si_context *sctx)
if (sctx->init_config_has_vgt_flush)
return;
+ /* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
si_pm4_cmd_begin(sctx->init_config, PKT3_EVENT_WRITE);
si_pm4_cmd_add(sctx->init_config, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
si_pm4_cmd_end(sctx->init_config, false);
@@ -1084,70 +1066,127 @@ static void si_init_config_add_vgt_flush(struct si_context *sctx)
}
/* Initialize state related to ESGS / GSVS ring buffers */
-static void si_init_gs_rings(struct si_context *sctx)
+static bool si_update_gs_ring_buffers(struct si_context *sctx)
{
- unsigned esgs_ring_size = 128 * 1024;
- unsigned gsvs_ring_size = 60 * 1024 * 1024;
+ struct si_shader_selector *es =
+ sctx->tes_shader.cso ? sctx->tes_shader.cso : sctx->vs_shader.cso;
+ struct si_shader_selector *gs = sctx->gs_shader.cso;
+ struct si_pm4_state *pm4;
- assert(!sctx->esgs_ring && !sctx->gsvs_ring);
+ /* Chip constants. */
+ unsigned num_se = sctx->screen->b.info.max_se;
+ unsigned wave_size = 64;
+ unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
+ unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register (per SE) */
+ unsigned alignment = 256 * num_se;
+ /* The maximum size is 63.999 MB per SE. */
+ unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
+
+ /* Calculate the minimum size. */
+ unsigned min_esgs_ring_size = align(es->esgs_itemsize * gs_vertex_reuse *
+ wave_size, alignment);
+
+ /* These are recommended sizes, not minimum sizes. */
+ unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
+ es->esgs_itemsize * gs->gs_input_verts_per_prim;
+ unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
+ gs->max_gsvs_emit_size * (gs->max_gs_stream + 1);
+
+ min_esgs_ring_size = align(min_esgs_ring_size, alignment);
+ esgs_ring_size = align(esgs_ring_size, alignment);
+ gsvs_ring_size = align(gsvs_ring_size, alignment);
+
+ esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
+ gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
+
+ /* Some rings don't have to be allocated if shaders don't use them.
+ * (e.g. no varyings between ES and GS or GS and VS)
+ */
+ bool update_esgs = esgs_ring_size &&
+ (!sctx->esgs_ring ||
+ sctx->esgs_ring->width0 < esgs_ring_size);
+ bool update_gsvs = gsvs_ring_size &&
+ (!sctx->gsvs_ring ||
+ sctx->gsvs_ring->width0 < gsvs_ring_size);
- sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT, esgs_ring_size);
- if (!sctx->esgs_ring)
- return;
+ if (!update_esgs && !update_gsvs)
+ return true;
- sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT, gsvs_ring_size);
- if (!sctx->gsvs_ring) {
+ if (update_esgs) {
pipe_resource_reference(&sctx->esgs_ring, NULL);
- return;
+ sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT,
+ esgs_ring_size);
+ if (!sctx->esgs_ring)
+ return false;
}
- si_init_config_add_vgt_flush(sctx);
+ if (update_gsvs) {
+ pipe_resource_reference(&sctx->gsvs_ring, NULL);
+ sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT,
+ gsvs_ring_size);
+ if (!sctx->gsvs_ring)
+ return false;
+ }
+
+ /* Create the "init_config_gs_rings" state. */
+ pm4 = CALLOC_STRUCT(si_pm4_state);
+ if (!pm4)
+ return false;
- /* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
- if (sctx->b.chip_class >= VI) {
- /* The maximum sizes are 63.999 MB on VI, because
- * the register fields only have 18 bits. */
- assert(esgs_ring_size / 256 < (1 << 18));
- assert(gsvs_ring_size / 256 < (1 << 18));
- }
- si_pm4_set_reg(sctx->init_config, R_030900_VGT_ESGS_RING_SIZE,
- esgs_ring_size / 256);
- si_pm4_set_reg(sctx->init_config, R_030904_VGT_GSVS_RING_SIZE,
- gsvs_ring_size / 256);
+ if (sctx->esgs_ring)
+ si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
+ sctx->esgs_ring->width0 / 256);
+ if (sctx->gsvs_ring)
+ si_pm4_set_reg(pm4, R_030904_VGT_GSVS_RING_SIZE,
+ sctx->gsvs_ring->width0 / 256);
} else {
- si_pm4_set_reg(sctx->init_config, R_0088C8_VGT_ESGS_RING_SIZE,
- esgs_ring_size / 256);
- si_pm4_set_reg(sctx->init_config, R_0088CC_VGT_GSVS_RING_SIZE,
- gsvs_ring_size / 256);
+ if (sctx->esgs_ring)
+ si_pm4_set_reg(pm4, R_0088C8_VGT_ESGS_RING_SIZE,
+ sctx->esgs_ring->width0 / 256);
+ if (sctx->gsvs_ring)
+ si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE,
+ sctx->gsvs_ring->width0 / 256);
}
- /* Flush the context to re-emit the init_config state.
- * This is done only once in a lifetime of a context.
- */
- si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
+ /* Set the state. */
+ if (sctx->init_config_gs_rings)
+ si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
+ sctx->init_config_gs_rings = pm4;
+
+ if (!sctx->init_config_has_vgt_flush) {
+ si_init_config_add_vgt_flush(sctx);
+ si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
+ }
+
+ /* Flush the context to re-emit both init_config states. */
sctx->b.initial_gfx_cs_size = 0; /* force flush */
si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
- si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
- sctx->esgs_ring, 0, esgs_ring_size,
- true, true, 4, 64, 0);
- si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
- sctx->esgs_ring, 0, esgs_ring_size,
- false, false, 0, 0, 0);
- si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
- sctx->gsvs_ring, 0, gsvs_ring_size,
- false, false, 0, 0, 0);
+ /* Set ring bindings. */
+ if (sctx->esgs_ring) {
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
+ sctx->esgs_ring, 0, sctx->esgs_ring->width0,
+ true, true, 4, 64, 0);
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
+ sctx->esgs_ring, 0, sctx->esgs_ring->width0,
+ false, false, 0, 0, 0);
+ }
+ if (sctx->gsvs_ring)
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
+ sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
+ false, false, 0, 0, 0);
+ return true;
}
-static void si_update_gs_rings(struct si_context *sctx)
+static void si_update_gsvs_ring_bindings(struct si_context *sctx)
{
- unsigned gsvs_itemsize = sctx->gs_shader.cso->gsvs_itemsize;
+ unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
uint64_t offset;
- if (gsvs_itemsize == sctx->last_gsvs_itemsize)
+ if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize)
return;
sctx->last_gsvs_itemsize = gsvs_itemsize;
@@ -1508,13 +1547,10 @@ bool si_update_shaders(struct si_context *sctx)
si_pm4_bind_state(sctx, vs, sctx->gs_shader.current->gs_copy_shader->pm4);
si_update_so(sctx, sctx->gs_shader.cso);
- if (!sctx->gsvs_ring) {
- si_init_gs_rings(sctx);
- if (!sctx->gsvs_ring)
- return false;
- }
+ if (!si_update_gs_ring_buffers(sctx))
+ return false;
- si_update_gs_rings(sctx);
+ si_update_gsvs_ring_bindings(sctx);
} else {
si_pm4_bind_state(sctx, gs, NULL);
si_pm4_bind_state(sctx, es, NULL);
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 4bb24572b90..0c48340beef 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -3608,6 +3608,9 @@
#define S_00B854_WAVES_PER_SH(x) (((x) & 0x3F) << 0) /* mask is 0x3FF on CIK */
#define G_00B854_WAVES_PER_SH(x) (((x) >> 0) & 0x3F) /* mask is 0x3FF on CIK */
#define C_00B854_WAVES_PER_SH 0xFFFFFFC0 /* mask is 0x3FF on CIK */
+#define S_00B854_WAVES_PER_SH_CIK(x) (((x) & 0x3FF) << 0)
+#define G_00B854_WAVES_PER_SH_CIK(x) (((x) >> 0) & 0x3FF)
+#define C_00B854_WAVES_PER_SH_CIK 0xFFFFFC00
#define S_00B854_TG_PER_CU(x) (((x) & 0x0F) << 12)
#define G_00B854_TG_PER_CU(x) (((x) >> 12) & 0x0F)
#define C_00B854_TG_PER_CU 0xFFFF0FFF
@@ -5211,6 +5214,296 @@
#define V_028714_SPI_SHADER_UINT16_ABGR 0x07
#define V_028714_SPI_SHADER_SINT16_ABGR 0x08
#define V_028714_SPI_SHADER_32_ABGR 0x09
+/* Stoney */
+#define R_028754_SX_PS_DOWNCONVERT 0x028754
+#define S_028754_MRT0(x) (((x) & 0x0F) << 0)
+#define G_028754_MRT0(x) (((x) >> 0) & 0x0F)
+#define C_028754_MRT0 0xFFFFFFF0
+#define V_028754_SX_RT_EXPORT_NO_CONVERSION 0
+#define V_028754_SX_RT_EXPORT_32_R 1
+#define V_028754_SX_RT_EXPORT_32_A 2
+#define V_028754_SX_RT_EXPORT_10_11_11 3
+#define V_028754_SX_RT_EXPORT_2_10_10_10 4
+#define V_028754_SX_RT_EXPORT_8_8_8_8 5
+#define V_028754_SX_RT_EXPORT_5_6_5 6
+#define V_028754_SX_RT_EXPORT_1_5_5_5 7
+#define V_028754_SX_RT_EXPORT_4_4_4_4 8
+#define V_028754_SX_RT_EXPORT_16_16_GR 9
+#define V_028754_SX_RT_EXPORT_16_16_AR 10
+#define S_028754_MRT1(x) (((x) & 0x0F) << 4)
+#define G_028754_MRT1(x) (((x) >> 4) & 0x0F)
+#define C_028754_MRT1 0xFFFFFF0F
+#define S_028754_MRT2(x) (((x) & 0x0F) << 8)
+#define G_028754_MRT2(x) (((x) >> 8) & 0x0F)
+#define C_028754_MRT2 0xFFFFF0FF
+#define S_028754_MRT3(x) (((x) & 0x0F) << 12)
+#define G_028754_MRT3(x) (((x) >> 12) & 0x0F)
+#define C_028754_MRT3 0xFFFF0FFF
+#define S_028754_MRT4(x) (((x) & 0x0F) << 16)
+#define G_028754_MRT4(x) (((x) >> 16) & 0x0F)
+#define C_028754_MRT4 0xFFF0FFFF
+#define S_028754_MRT5(x) (((x) & 0x0F) << 20)
+#define G_028754_MRT5(x) (((x) >> 20) & 0x0F)
+#define C_028754_MRT5 0xFF0FFFFF
+#define S_028754_MRT6(x) (((x) & 0x0F) << 24)
+#define G_028754_MRT6(x) (((x) >> 24) & 0x0F)
+#define C_028754_MRT6 0xF0FFFFFF
+#define S_028754_MRT7(x) (((x) & 0x0F) << 28)
+#define G_028754_MRT7(x) (((x) >> 28) & 0x0F)
+#define C_028754_MRT7 0x0FFFFFFF
+#define R_028758_SX_BLEND_OPT_EPSILON 0x028758
+#define S_028758_MRT0_EPSILON(x) (((x) & 0x0F) << 0)
+#define G_028758_MRT0_EPSILON(x) (((x) >> 0) & 0x0F)
+#define C_028758_MRT0_EPSILON 0xFFFFFFF0
+#define V_028758_EXACT 0
+#define V_028758_11BIT_FORMAT 1
+#define V_028758_10BIT_FORMAT 3
+#define V_028758_8BIT_FORMAT 7
+#define V_028758_6BIT_FORMAT 11
+#define V_028758_5BIT_FORMAT 13
+#define V_028758_4BIT_FORMAT 15
+#define S_028758_MRT1_EPSILON(x) (((x) & 0x0F) << 4)
+#define G_028758_MRT1_EPSILON(x) (((x) >> 4) & 0x0F)
+#define C_028758_MRT1_EPSILON 0xFFFFFF0F
+#define S_028758_MRT2_EPSILON(x) (((x) & 0x0F) << 8)
+#define G_028758_MRT2_EPSILON(x) (((x) >> 8) & 0x0F)
+#define C_028758_MRT2_EPSILON 0xFFFFF0FF
+#define S_028758_MRT3_EPSILON(x) (((x) & 0x0F) << 12)
+#define G_028758_MRT3_EPSILON(x) (((x) >> 12) & 0x0F)
+#define C_028758_MRT3_EPSILON 0xFFFF0FFF
+#define S_028758_MRT4_EPSILON(x) (((x) & 0x0F) << 16)
+#define G_028758_MRT4_EPSILON(x) (((x) >> 16) & 0x0F)
+#define C_028758_MRT4_EPSILON 0xFFF0FFFF
+#define S_028758_MRT5_EPSILON(x) (((x) & 0x0F) << 20)
+#define G_028758_MRT5_EPSILON(x) (((x) >> 20) & 0x0F)
+#define C_028758_MRT5_EPSILON 0xFF0FFFFF
+#define S_028758_MRT6_EPSILON(x) (((x) & 0x0F) << 24)
+#define G_028758_MRT6_EPSILON(x) (((x) >> 24) & 0x0F)
+#define C_028758_MRT6_EPSILON 0xF0FFFFFF
+#define S_028758_MRT7_EPSILON(x) (((x) & 0x0F) << 28)
+#define G_028758_MRT7_EPSILON(x) (((x) >> 28) & 0x0F)
+#define C_028758_MRT7_EPSILON 0x0FFFFFFF
+#define R_02875C_SX_BLEND_OPT_CONTROL 0x02875C
+#define S_02875C_MRT0_COLOR_OPT_DISABLE(x) (((x) & 0x1) << 0)
+#define G_02875C_MRT0_COLOR_OPT_DISABLE(x) (((x) >> 0) & 0x1)
+#define C_02875C_MRT0_COLOR_OPT_DISABLE 0xFFFFFFFE
+#define S_02875C_MRT0_ALPHA_OPT_DISABLE(x) (((x) & 0x1) << 1)
+#define G_02875C_MRT0_ALPHA_OPT_DISABLE(x) (((x) >> 1) & 0x1)
+#define C_02875C_MRT0_ALPHA_OPT_DISABLE 0xFFFFFFFD
+#define S_02875C_MRT1_COLOR_OPT_DISABLE(x) (((x) & 0x1) << 4)
+#define G_02875C_MRT1_COLOR_OPT_DISABLE(x) (((x) >> 4) & 0x1)
+#define C_02875C_MRT1_COLOR_OPT_DISABLE 0xFFFFFFEF
+#define S_02875C_MRT1_ALPHA_OPT_DISABLE(x) (((x) & 0x1) << 5)
+#define G_02875C_MRT1_ALPHA_OPT_DISABLE(x) (((x) >> 5) & 0x1)
+#define C_02875C_MRT1_ALPHA_OPT_DISABLE 0xFFFFFFDF
+#define S_02875C_MRT2_COLOR_OPT_DISABLE(x) (((x) & 0x1) << 8)
+#define G_02875C_MRT2_COLOR_OPT_DISABLE(x) (((x) >> 8) & 0x1)
+#define C_02875C_MRT2_COLOR_OPT_DISABLE 0xFFFFFEFF
+#define S_02875C_MRT2_ALPHA_OPT_DISABLE(x) (((x) & 0x1) << 9)
+#define G_02875C_MRT2_ALPHA_OPT_DISABLE(x) (((x) >> 9) & 0x1)
+#define C_02875C_MRT2_ALPHA_OPT_DISABLE 0xFFFFFDFF
+#define S_02875C_MRT3_COLOR_OPT_DISABLE(x) (((x) & 0x1) << 12)
+#define G_02875C_MRT3_COLOR_OPT_DISABLE(x) (((x) >> 12) & 0x1)
+#define C_02875C_MRT3_COLOR_OPT_DISABLE 0xFFFFEFFF
+#define S_02875C_MRT3_ALPHA_OPT_DISABLE(x) (((x) & 0x1) << 13)
+#define G_02875C_MRT3_ALPHA_OPT_DISABLE(x) (((x) >> 13) & 0x1)
+#define C_02875C_MRT3_ALPHA_OPT_DISABLE 0xFFFFDFFF
+#define S_02875C_MRT4_COLOR_OPT_DISABLE(x) (((x) & 0x1) << 16)
+#define G_02875C_MRT4_COLOR_OPT_DISABLE(x) (((x) >> 16) & 0x1)
+#define C_02875C_MRT4_COLOR_OPT_DISABLE 0xFFFEFFFF
+#define S_02875C_MRT4_ALPHA_OPT_DISABLE(x) (((x) & 0x1) << 17)
+#define G_02875C_MRT4_ALPHA_OPT_DISABLE(x) (((x) >> 17) & 0x1)
+#define C_02875C_MRT4_ALPHA_OPT_DISABLE 0xFFFDFFFF
+#define S_02875C_MRT5_COLOR_OPT_DISABLE(x) (((x) & 0x1) << 20)
+#define G_02875C_MRT5_COLOR_OPT_DISABLE(x) (((x) >> 20) & 0x1)
+#define C_02875C_MRT5_COLOR_OPT_DISABLE 0xFFEFFFFF
+#define S_02875C_MRT5_ALPHA_OPT_DISABLE(x) (((x) & 0x1) << 21)
+#define G_02875C_MRT5_ALPHA_OPT_DISABLE(x) (((x) >> 21) & 0x1)
+#define C_02875C_MRT5_ALPHA_OPT_DISABLE 0xFFDFFFFF
+#define S_02875C_MRT6_COLOR_OPT_DISABLE(x) (((x) & 0x1) << 24)
+#define G_02875C_MRT6_COLOR_OPT_DISABLE(x) (((x) >> 24) & 0x1)
+#define C_02875C_MRT6_COLOR_OPT_DISABLE 0xFEFFFFFF
+#define S_02875C_MRT6_ALPHA_OPT_DISABLE(x) (((x) & 0x1) << 25)
+#define G_02875C_MRT6_ALPHA_OPT_DISABLE(x) (((x) >> 25) & 0x1)
+#define C_02875C_MRT6_ALPHA_OPT_DISABLE 0xFDFFFFFF
+#define S_02875C_MRT7_COLOR_OPT_DISABLE(x) (((x) & 0x1) << 28)
+#define G_02875C_MRT7_COLOR_OPT_DISABLE(x) (((x) >> 28) & 0x1)
+#define C_02875C_MRT7_COLOR_OPT_DISABLE 0xEFFFFFFF
+#define S_02875C_MRT7_ALPHA_OPT_DISABLE(x) (((x) & 0x1) << 29)
+#define G_02875C_MRT7_ALPHA_OPT_DISABLE(x) (((x) >> 29) & 0x1)
+#define C_02875C_MRT7_ALPHA_OPT_DISABLE 0xDFFFFFFF
+#define S_02875C_PIXEN_ZERO_OPT_DISABLE(x) (((x) & 0x1) << 31)
+#define G_02875C_PIXEN_ZERO_OPT_DISABLE(x) (((x) >> 31) & 0x1)
+#define C_02875C_PIXEN_ZERO_OPT_DISABLE 0x7FFFFFFF
+#define R_028760_SX_MRT0_BLEND_OPT 0x028760
+#define S_028760_COLOR_SRC_OPT(x) (((x) & 0x07) << 0)
+#define G_028760_COLOR_SRC_OPT(x) (((x) >> 0) & 0x07)
+#define C_028760_COLOR_SRC_OPT 0xFFFFFFF8
+#define V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL 0
+#define V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 1
+#define V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0 2
+#define V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1 3
+#define V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 4
+#define V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 5
+#define V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0 6
+#define V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE 7
+#define S_028760_COLOR_DST_OPT(x) (((x) & 0x07) << 4)
+#define G_028760_COLOR_DST_OPT(x) (((x) >> 4) & 0x07)
+#define C_028760_COLOR_DST_OPT 0xFFFFFF8F
+#define S_028760_COLOR_COMB_FCN(x) (((x) & 0x07) << 8)
+#define G_028760_COLOR_COMB_FCN(x) (((x) >> 8) & 0x07)
+#define C_028760_COLOR_COMB_FCN 0xFFFFF8FF
+#define V_028760_OPT_COMB_NONE 0
+#define V_028760_OPT_COMB_ADD 1
+#define V_028760_OPT_COMB_SUBTRACT 2
+#define V_028760_OPT_COMB_MIN 3
+#define V_028760_OPT_COMB_MAX 4
+#define V_028760_OPT_COMB_REVSUBTRACT 5
+#define V_028760_OPT_COMB_BLEND_DISABLED 6
+#define V_028760_OPT_COMB_SAFE_ADD 7
+#define S_028760_ALPHA_SRC_OPT(x) (((x) & 0x07) << 16)
+#define G_028760_ALPHA_SRC_OPT(x) (((x) >> 16) & 0x07)
+#define C_028760_ALPHA_SRC_OPT 0xFFF8FFFF
+#define S_028760_ALPHA_DST_OPT(x) (((x) & 0x07) << 20)
+#define G_028760_ALPHA_DST_OPT(x) (((x) >> 20) & 0x07)
+#define C_028760_ALPHA_DST_OPT 0xFF8FFFFF
+#define S_028760_ALPHA_COMB_FCN(x) (((x) & 0x07) << 24)
+#define G_028760_ALPHA_COMB_FCN(x) (((x) >> 24) & 0x07)
+#define C_028760_ALPHA_COMB_FCN 0xF8FFFFFF
+#define R_028764_SX_MRT1_BLEND_OPT 0x028764
+#define S_028764_COLOR_SRC_OPT(x) (((x) & 0x07) << 0)
+#define G_028764_COLOR_SRC_OPT(x) (((x) >> 0) & 0x07)
+#define C_028764_COLOR_SRC_OPT 0xFFFFFFF8
+#define S_028764_COLOR_DST_OPT(x) (((x) & 0x07) << 4)
+#define G_028764_COLOR_DST_OPT(x) (((x) >> 4) & 0x07)
+#define C_028764_COLOR_DST_OPT 0xFFFFFF8F
+#define S_028764_COLOR_COMB_FCN(x) (((x) & 0x07) << 8)
+#define G_028764_COLOR_COMB_FCN(x) (((x) >> 8) & 0x07)
+#define C_028764_COLOR_COMB_FCN 0xFFFFF8FF
+#define S_028764_ALPHA_SRC_OPT(x) (((x) & 0x07) << 16)
+#define G_028764_ALPHA_SRC_OPT(x) (((x) >> 16) & 0x07)
+#define C_028764_ALPHA_SRC_OPT 0xFFF8FFFF
+#define S_028764_ALPHA_DST_OPT(x) (((x) & 0x07) << 20)
+#define G_028764_ALPHA_DST_OPT(x) (((x) >> 20) & 0x07)
+#define C_028764_ALPHA_DST_OPT 0xFF8FFFFF
+#define S_028764_ALPHA_COMB_FCN(x) (((x) & 0x07) << 24)
+#define G_028764_ALPHA_COMB_FCN(x) (((x) >> 24) & 0x07)
+#define C_028764_ALPHA_COMB_FCN 0xF8FFFFFF
+#define R_028768_SX_MRT2_BLEND_OPT 0x028768
+#define S_028768_COLOR_SRC_OPT(x) (((x) & 0x07) << 0)
+#define G_028768_COLOR_SRC_OPT(x) (((x) >> 0) & 0x07)
+#define C_028768_COLOR_SRC_OPT 0xFFFFFFF8
+#define S_028768_COLOR_DST_OPT(x) (((x) & 0x07) << 4)
+#define G_028768_COLOR_DST_OPT(x) (((x) >> 4) & 0x07)
+#define C_028768_COLOR_DST_OPT 0xFFFFFF8F
+#define S_028768_COLOR_COMB_FCN(x) (((x) & 0x07) << 8)
+#define G_028768_COLOR_COMB_FCN(x) (((x) >> 8) & 0x07)
+#define C_028768_COLOR_COMB_FCN 0xFFFFF8FF
+#define S_028768_ALPHA_SRC_OPT(x) (((x) & 0x07) << 16)
+#define G_028768_ALPHA_SRC_OPT(x) (((x) >> 16) & 0x07)
+#define C_028768_ALPHA_SRC_OPT 0xFFF8FFFF
+#define S_028768_ALPHA_DST_OPT(x) (((x) & 0x07) << 20)
+#define G_028768_ALPHA_DST_OPT(x) (((x) >> 20) & 0x07)
+#define C_028768_ALPHA_DST_OPT 0xFF8FFFFF
+#define S_028768_ALPHA_COMB_FCN(x) (((x) & 0x07) << 24)
+#define G_028768_ALPHA_COMB_FCN(x) (((x) >> 24) & 0x07)
+#define C_028768_ALPHA_COMB_FCN 0xF8FFFFFF
+#define R_02876C_SX_MRT3_BLEND_OPT 0x02876C
+#define S_02876C_COLOR_SRC_OPT(x) (((x) & 0x07) << 0)
+#define G_02876C_COLOR_SRC_OPT(x) (((x) >> 0) & 0x07)
+#define C_02876C_COLOR_SRC_OPT 0xFFFFFFF8
+#define S_02876C_COLOR_DST_OPT(x) (((x) & 0x07) << 4)
+#define G_02876C_COLOR_DST_OPT(x) (((x) >> 4) & 0x07)
+#define C_02876C_COLOR_DST_OPT 0xFFFFFF8F
+#define S_02876C_COLOR_COMB_FCN(x) (((x) & 0x07) << 8)
+#define G_02876C_COLOR_COMB_FCN(x) (((x) >> 8) & 0x07)
+#define C_02876C_COLOR_COMB_FCN 0xFFFFF8FF
+#define S_02876C_ALPHA_SRC_OPT(x) (((x) & 0x07) << 16)
+#define G_02876C_ALPHA_SRC_OPT(x) (((x) >> 16) & 0x07)
+#define C_02876C_ALPHA_SRC_OPT 0xFFF8FFFF
+#define S_02876C_ALPHA_DST_OPT(x) (((x) & 0x07) << 20)
+#define G_02876C_ALPHA_DST_OPT(x) (((x) >> 20) & 0x07)
+#define C_02876C_ALPHA_DST_OPT 0xFF8FFFFF
+#define S_02876C_ALPHA_COMB_FCN(x) (((x) & 0x07) << 24)
+#define G_02876C_ALPHA_COMB_FCN(x) (((x) >> 24) & 0x07)
+#define C_02876C_ALPHA_COMB_FCN 0xF8FFFFFF
+#define R_028770_SX_MRT4_BLEND_OPT 0x028770
+#define S_028770_COLOR_SRC_OPT(x) (((x) & 0x07) << 0)
+#define G_028770_COLOR_SRC_OPT(x) (((x) >> 0) & 0x07)
+#define C_028770_COLOR_SRC_OPT 0xFFFFFFF8
+#define S_028770_COLOR_DST_OPT(x) (((x) & 0x07) << 4)
+#define G_028770_COLOR_DST_OPT(x) (((x) >> 4) & 0x07)
+#define C_028770_COLOR_DST_OPT 0xFFFFFF8F
+#define S_028770_COLOR_COMB_FCN(x) (((x) & 0x07) << 8)
+#define G_028770_COLOR_COMB_FCN(x) (((x) >> 8) & 0x07)
+#define C_028770_COLOR_COMB_FCN 0xFFFFF8FF
+#define S_028770_ALPHA_SRC_OPT(x) (((x) & 0x07) << 16)
+#define G_028770_ALPHA_SRC_OPT(x) (((x) >> 16) & 0x07)
+#define C_028770_ALPHA_SRC_OPT 0xFFF8FFFF
+#define S_028770_ALPHA_DST_OPT(x) (((x) & 0x07) << 20)
+#define G_028770_ALPHA_DST_OPT(x) (((x) >> 20) & 0x07)
+#define C_028770_ALPHA_DST_OPT 0xFF8FFFFF
+#define S_028770_ALPHA_COMB_FCN(x) (((x) & 0x07) << 24)
+#define G_028770_ALPHA_COMB_FCN(x) (((x) >> 24) & 0x07)
+#define C_028770_ALPHA_COMB_FCN 0xF8FFFFFF
+#define R_028774_SX_MRT5_BLEND_OPT 0x028774
+#define S_028774_COLOR_SRC_OPT(x) (((x) & 0x07) << 0)
+#define G_028774_COLOR_SRC_OPT(x) (((x) >> 0) & 0x07)
+#define C_028774_COLOR_SRC_OPT 0xFFFFFFF8
+#define S_028774_COLOR_DST_OPT(x) (((x) & 0x07) << 4)
+#define G_028774_COLOR_DST_OPT(x) (((x) >> 4) & 0x07)
+#define C_028774_COLOR_DST_OPT 0xFFFFFF8F
+#define S_028774_COLOR_COMB_FCN(x) (((x) & 0x07) << 8)
+#define G_028774_COLOR_COMB_FCN(x) (((x) >> 8) & 0x07)
+#define C_028774_COLOR_COMB_FCN 0xFFFFF8FF
+#define S_028774_ALPHA_SRC_OPT(x) (((x) & 0x07) << 16)
+#define G_028774_ALPHA_SRC_OPT(x) (((x) >> 16) & 0x07)
+#define C_028774_ALPHA_SRC_OPT 0xFFF8FFFF
+#define S_028774_ALPHA_DST_OPT(x) (((x) & 0x07) << 20)
+#define G_028774_ALPHA_DST_OPT(x) (((x) >> 20) & 0x07)
+#define C_028774_ALPHA_DST_OPT 0xFF8FFFFF
+#define S_028774_ALPHA_COMB_FCN(x) (((x) & 0x07) << 24)
+#define G_028774_ALPHA_COMB_FCN(x) (((x) >> 24) & 0x07)
+#define C_028774_ALPHA_COMB_FCN 0xF8FFFFFF
+#define R_028778_SX_MRT6_BLEND_OPT 0x028778
+#define S_028778_COLOR_SRC_OPT(x) (((x) & 0x07) << 0)
+#define G_028778_COLOR_SRC_OPT(x) (((x) >> 0) & 0x07)
+#define C_028778_COLOR_SRC_OPT 0xFFFFFFF8
+#define S_028778_COLOR_DST_OPT(x) (((x) & 0x07) << 4)
+#define G_028778_COLOR_DST_OPT(x) (((x) >> 4) & 0x07)
+#define C_028778_COLOR_DST_OPT 0xFFFFFF8F
+#define S_028778_COLOR_COMB_FCN(x) (((x) & 0x07) << 8)
+#define G_028778_COLOR_COMB_FCN(x) (((x) >> 8) & 0x07)
+#define C_028778_COLOR_COMB_FCN 0xFFFFF8FF
+#define S_028778_ALPHA_SRC_OPT(x) (((x) & 0x07) << 16)
+#define G_028778_ALPHA_SRC_OPT(x) (((x) >> 16) & 0x07)
+#define C_028778_ALPHA_SRC_OPT 0xFFF8FFFF
+#define S_028778_ALPHA_DST_OPT(x) (((x) & 0x07) << 20)
+#define G_028778_ALPHA_DST_OPT(x) (((x) >> 20) & 0x07)
+#define C_028778_ALPHA_DST_OPT 0xFF8FFFFF
+#define S_028778_ALPHA_COMB_FCN(x) (((x) & 0x07) << 24)
+#define G_028778_ALPHA_COMB_FCN(x) (((x) >> 24) & 0x07)
+#define C_028778_ALPHA_COMB_FCN 0xF8FFFFFF
+#define R_02877C_SX_MRT7_BLEND_OPT 0x02877C
+#define S_02877C_COLOR_SRC_OPT(x) (((x) & 0x07) << 0)
+#define G_02877C_COLOR_SRC_OPT(x) (((x) >> 0) & 0x07)
+#define C_02877C_COLOR_SRC_OPT 0xFFFFFFF8
+#define S_02877C_COLOR_DST_OPT(x) (((x) & 0x07) << 4)
+#define G_02877C_COLOR_DST_OPT(x) (((x) >> 4) & 0x07)
+#define C_02877C_COLOR_DST_OPT 0xFFFFFF8F
+#define S_02877C_COLOR_COMB_FCN(x) (((x) & 0x07) << 8)
+#define G_02877C_COLOR_COMB_FCN(x) (((x) >> 8) & 0x07)
+#define C_02877C_COLOR_COMB_FCN 0xFFFFF8FF
+#define S_02877C_ALPHA_SRC_OPT(x) (((x) & 0x07) << 16)
+#define G_02877C_ALPHA_SRC_OPT(x) (((x) >> 16) & 0x07)
+#define C_02877C_ALPHA_SRC_OPT 0xFFF8FFFF
+#define S_02877C_ALPHA_DST_OPT(x) (((x) & 0x07) << 20)
+#define G_02877C_ALPHA_DST_OPT(x) (((x) >> 20) & 0x07)
+#define C_02877C_ALPHA_DST_OPT 0xFF8FFFFF
+#define S_02877C_ALPHA_COMB_FCN(x) (((x) & 0x07) << 24)
+#define G_02877C_ALPHA_COMB_FCN(x) (((x) >> 24) & 0x07)
+#define C_02877C_ALPHA_COMB_FCN 0xF8FFFFFF
+/* */
#define R_028780_CB_BLEND0_CONTROL 0x028780
#define S_028780_COLOR_SRCBLEND(x) (((x) & 0x1F) << 0)
#define G_028780_COLOR_SRCBLEND(x) (((x) >> 0) & 0x1F)
@@ -5473,6 +5766,7 @@
#define V_028808_CB_ELIMINATE_FAST_CLEAR 0x02
#define V_028808_CB_RESOLVE 0x03
#define V_028808_CB_FMASK_DECOMPRESS 0x05
+#define V_028808_CB_DCC_DECOMPRESS 0x06
#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
#define C_028808_ROP3 0xFF00FFFF
@@ -5551,6 +5845,11 @@
#define V_02880C_EXPORT_GREATER_THAN_Z 2
#define V_02880C_EXPORT_RESERVED 3
/* */
+/* Stoney */
+#define S_02880C_DUAL_QUAD_DISABLE(x) (((x) & 0x1) << 15)
+#define G_02880C_DUAL_QUAD_DISABLE(x) (((x) >> 15) & 0x1)
+#define C_02880C_DUAL_QUAD_DISABLE 0xFFFF7FFF
+/* */
#define R_028810_PA_CL_CLIP_CNTL 0x028810
#define S_028810_UCP_ENA_0(x) (((x) & 0x1) << 0)
#define G_028810_UCP_ENA_0(x) (((x) >> 0) & 0x1)
@@ -6132,6 +6431,9 @@
#define V_028A40_GS_SCENARIO_G 0x03
#define V_028A40_GS_SCENARIO_C 0x04
#define V_028A40_SPRITE_EN 0x05
+#define S_028A40_RESERVED_0(x) (((x) & 0x1) << 3)
+#define G_028A40_RESERVED_0(x) (((x) >> 3) & 0x1)
+#define C_028A40_RESERVED_0 0xFFFFFFF7
#define S_028A40_CUT_MODE(x) (((x) & 0x03) << 4)
#define G_028A40_CUT_MODE(x) (((x) >> 4) & 0x03)
#define C_028A40_CUT_MODE 0xFFFFFFCF
@@ -6139,12 +6441,19 @@
#define V_028A40_GS_CUT_512 0x01
#define V_028A40_GS_CUT_256 0x02
#define V_028A40_GS_CUT_128 0x03
+#define S_028A40_RESERVED_1(x) (((x) & 0x1F) << 6)
+#define G_028A40_RESERVED_1(x) (((x) >> 6) & 0x1F)
+#define C_028A40_RESERVED_1 0xFFFFF83F
#define S_028A40_GS_C_PACK_EN(x) (((x) & 0x1) << 11)
#define G_028A40_GS_C_PACK_EN(x) (((x) >> 11) & 0x1)
#define C_028A40_GS_C_PACK_EN 0xFFFFF7FF
+#define S_028A40_RESERVED_2(x) (((x) & 0x1) << 12)
+#define G_028A40_RESERVED_2(x) (((x) >> 12) & 0x1)
+#define C_028A40_RESERVED_2 0xFFFFEFFF
#define S_028A40_ES_PASSTHRU(x) (((x) & 0x1) << 13)
#define G_028A40_ES_PASSTHRU(x) (((x) >> 13) & 0x1)
#define C_028A40_ES_PASSTHRU 0xFFFFDFFF
+/* SI-CIK */
#define S_028A40_COMPUTE_MODE(x) (((x) & 0x1) << 14)
#define G_028A40_COMPUTE_MODE(x) (((x) >> 14) & 0x1)
#define C_028A40_COMPUTE_MODE 0xFFFFBFFF
@@ -6154,6 +6463,7 @@
#define S_028A40_ELEMENT_INFO_EN(x) (((x) & 0x1) << 16)
#define G_028A40_ELEMENT_INFO_EN(x) (((x) >> 16) & 0x1)
#define C_028A40_ELEMENT_INFO_EN 0xFFFEFFFF
+/* */
#define S_028A40_PARTIAL_THD_AT_EOI(x) (((x) & 0x1) << 17)
#define G_028A40_PARTIAL_THD_AT_EOI(x) (((x) >> 17) & 0x1)
#define C_028A40_PARTIAL_THD_AT_EOI 0xFFFDFFFF
@@ -6339,6 +6649,9 @@
#define C_028A7C_RDREQ_POLICY 0xFFFFFF3F
#define V_028A7C_VGT_POLICY_LRU 0x00
#define V_028A7C_VGT_POLICY_STREAM 0x01
+#define S_028A7C_RDREQ_POLICY_VI(x) (((x) & 0x1) << 6)
+#define G_028A7C_RDREQ_POLICY_VI(x) (((x) >> 6) & 0x1)
+#define C_028A7C_RDREQ_POLICY_VI 0xFFFFFFBF
#define S_028A7C_ATC(x) (((x) & 0x1) << 8)
#define G_028A7C_ATC(x) (((x) >> 8) & 0x1)
#define C_028A7C_ATC 0xFFFFFEFF
@@ -6715,6 +7028,9 @@
#define V_028B6C_VGT_POLICY_BYPASS 0x02
/* */
/* VI */
+#define S_028B6C_RDREQ_POLICY_VI(x) (((x) & 0x1) << 15)
+#define G_028B6C_RDREQ_POLICY_VI(x) (((x) >> 15) & 0x1)
+#define C_028B6C_RDREQ_POLICY_VI 0xFFFF7FFF
#define S_028B6C_DISTRIBUTION_MODE(x) (((x) & 0x03) << 17)
#define G_028B6C_DISTRIBUTION_MODE(x) (((x) >> 17) & 0x03)
#define C_028B6C_DISTRIBUTION_MODE 0xFFF9FFFF
@@ -7317,6 +7633,12 @@
#define S_028C3C_AA_MASK_X1Y1(x) (((x) & 0xFFFF) << 16)
#define G_028C3C_AA_MASK_X1Y1(x) (((x) >> 16) & 0xFFFF)
#define C_028C3C_AA_MASK_X1Y1 0x0000FFFF
+/* Stoney */
+#define R_028C40_PA_SC_SHADER_CONTROL 0x028C40
+#define S_028C40_REALIGN_DQUADS_AFTER_N_WAVES(x) (((x) & 0x03) << 0)
+#define G_028C40_REALIGN_DQUADS_AFTER_N_WAVES(x) (((x) >> 0) & 0x03)
+#define C_028C40_REALIGN_DQUADS_AFTER_N_WAVES 0xFFFFFFFC
+/* */
#define R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL 0x028C58
#define S_028C58_VTX_REUSE_DEPTH(x) (((x) & 0xFF) << 0)
#define G_028C58_VTX_REUSE_DEPTH(x) (((x) >> 0) & 0xFF)
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index c0fc82b2f2c..bb4cef29ec9 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -250,6 +250,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index caf4b17de16..acb2e95e747 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -204,7 +204,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
unsigned prim, unsigned start, unsigned count,
unsigned start_instance, unsigned instance_count)
{
- unsigned gen_prim, gen_size, gen_nr, gen_type;
+ unsigned gen_prim, gen_size, gen_nr;
+ enum indices_mode gen_type;
u_generate_func gen_func;
enum pipe_error ret = PIPE_OK;
unsigned api_pv = hwtnl->api_pv;
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
index 9df8f6e9beb..0213409ef29 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -133,7 +133,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
unsigned prim, unsigned start, unsigned count,
unsigned start_instance, unsigned instance_count)
{
- unsigned gen_prim, gen_size, gen_nr, gen_type;
+ unsigned gen_prim, gen_size, gen_nr;
+ enum indices_mode gen_type;
u_translate_func gen_func;
enum pipe_error ret = PIPE_OK;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 5aa7b0d86eb..a80bc9b9119 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -383,6 +383,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
}
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index e70ee689c59..9b7ab16103f 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -2672,6 +2672,7 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
}
else if (emit->unit == PIPE_SHADER_FRAGMENT) {
if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
+ emit->key.fs.white_fragments ||
emit->key.fs.write_color0_to_n_cbufs > 1) {
/* Allocate a temp to hold the output color */
emit->fs.color_tmp_index = total_temps;
@@ -6369,8 +6370,11 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
emit_src_register(emit, &tmp_src_x);
end_emit_instruction(emit);
- /* If we don't need to broadcast the color below, emit final color here */
- if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
+ /* If we don't need to broadcast the color below or set fragments to
+ * white, emit final color here.
+ */
+ if (emit->key.fs.write_color0_to_n_cbufs <= 1 &&
+ !emit->key.fs.white_fragments) {
/* MOV output.color, tempcolor */
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
&color_src, FALSE); /* XXX saturate? */
@@ -6381,9 +6385,27 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
/**
+ * When we need to emit white for all fragments (for emulating XOR logicop
+ * mode), this function copies white into the temporary color output register.
+ */
+static void
+emit_set_color_white(struct svga_shader_emitter_v10 *emit,
+ unsigned fs_color_tmp_index)
+{
+ struct tgsi_full_dst_register color_dst =
+ make_dst_temp_reg(fs_color_tmp_index);
+ struct tgsi_full_src_register white =
+ make_immediate_reg_float(emit, 1.0f);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE);
+}
+
+
+/**
* Emit instructions for writing a single color output to multiple
* color buffers.
- * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
+ * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
+ * when key.fs.white_fragments is true).
* property is set and the number of render targets is greater than one.
* \param fs_color_tmp_index index of the temp register that holds the
* color to broadcast.
@@ -6398,7 +6420,6 @@ emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
make_src_temp_reg(fs_color_tmp_index);
assert(emit->unit == PIPE_SHADER_FRAGMENT);
- assert(n > 1);
for (i = 0; i < n; i++) {
unsigned output_reg = emit->fs.color_out_index[i];
@@ -6440,7 +6461,11 @@ emit_post_helpers(struct svga_shader_emitter_v10 *emit)
if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
emit_alpha_test_instructions(emit, fs_color_tmp_index);
}
- if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+ if (emit->key.fs.white_fragments) {
+ emit_set_color_white(emit, fs_color_tmp_index);
+ }
+ if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
+ emit->key.fs.white_fragments) {
emit_broadcast_color_instructions(emit, fs_color_tmp_index);
}
}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index f7b41f5816d..21e3bde2ee2 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -37,14 +37,17 @@
static bool dump_stats = false;
static void
+vc4_bo_cache_free_all(struct vc4_bo_cache *cache);
+
+static void
vc4_bo_dump_stats(struct vc4_screen *screen)
{
struct vc4_bo_cache *cache = &screen->bo_cache;
fprintf(stderr, " BOs allocated: %d\n", screen->bo_count);
- fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 102);
+ fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024);
fprintf(stderr, " BOs cached: %d\n", cache->bo_count);
- fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 102);
+ fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024);
if (!list_empty(&cache->time_list)) {
struct vc4_bo *first = LIST_ENTRY(struct vc4_bo,
@@ -136,6 +139,8 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
bo->name = name;
bo->private = true;
+ bool cleared_and_retried = false;
+retry:
if (!using_vc4_simulator) {
struct drm_vc4_create_bo create;
memset(&create, 0, sizeof(create));
@@ -157,8 +162,15 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
assert(create.size >= size);
}
if (ret != 0) {
- fprintf(stderr, "create ioctl failure\n");
- abort();
+ if (!list_empty(&screen->bo_cache.time_list) &&
+ !cleared_and_retried) {
+ cleared_and_retried = true;
+ vc4_bo_cache_free_all(&screen->bo_cache);
+ goto retry;
+ }
+
+ free(bo);
+ return NULL;
}
screen->bo_count++;
@@ -248,6 +260,18 @@ free_stale_bos(struct vc4_screen *screen, time_t time)
}
}
+static void
+vc4_bo_cache_free_all(struct vc4_bo_cache *cache)
+{
+ pipe_mutex_lock(cache->lock);
+ list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
+ time_list) {
+ vc4_bo_remove_from_cache(cache, bo);
+ vc4_bo_free(bo);
+ }
+ pipe_mutex_unlock(cache->lock);
+}
+
void
vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
{
@@ -428,7 +452,7 @@ vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data, uint32_t size)
screen->bo_count++;
screen->bo_size += bo->size;
if (dump_stats) {
- fprintf(stderr, "Allocated shader %dkb:\n", size / 1024);
+ fprintf(stderr, "Allocated shader %dkb:\n", bo->size / 1024);
vc4_bo_dump_stats(screen);
}
@@ -600,11 +624,7 @@ vc4_bufmgr_destroy(struct pipe_screen *pscreen)
struct vc4_screen *screen = vc4_screen(pscreen);
struct vc4_bo_cache *cache = &screen->bo_cache;
- list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
- time_list) {
- vc4_bo_remove_from_cache(cache, bo);
- vc4_bo_free(bo);
- }
+ vc4_bo_cache_free_all(cache);
if (dump_stats) {
fprintf(stderr, "BO stats after screen destroy:\n");
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 476d2b5b0b1..a719f276b2e 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -184,6 +184,21 @@ dump_VC4_PACKET_GL_INDEXED_PRIMITIVE(void *cl, uint32_t offset, uint32_t hw_offs
}
static void
+dump_VC4_PACKET_GL_ARRAY_PRIMITIVE(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ uint8_t *b = cl + offset;
+ uint32_t *count = cl + offset + 1;
+ uint32_t *start = cl + offset + 5;
+
+ fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s\n",
+ offset, hw_offset, b[0], u_prim_name(b[0] & 0x7));
+ fprintf(stderr, "0x%08x 0x%08x: %d verts\n",
+ offset + 1, hw_offset + 1, *count);
+ fprintf(stderr, "0x%08x 0x%08x: 0x%08x start\n",
+ offset + 5, hw_offset + 5, *start);
+}
+
+static void
dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint32_t *bits = cl + offset;
@@ -380,7 +395,7 @@ static const struct packet_info {
PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
PACKET_DUMP(VC4_PACKET_GL_INDEXED_PRIMITIVE),
- PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
+ PACKET_DUMP(VC4_PACKET_GL_ARRAY_PRIMITIVE),
PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE),
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 122bda0bac6..bb723845531 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -35,11 +35,12 @@
static bool miptree_debug = false;
-static void
+static bool
vc4_resource_bo_alloc(struct vc4_resource *rsc)
{
struct pipe_resource *prsc = &rsc->base.b;
struct pipe_screen *pscreen = prsc->screen;
+ struct vc4_bo *bo;
if (miptree_debug) {
fprintf(stderr, "alloc %p: size %d + offset %d -> %d\n",
@@ -51,12 +52,18 @@ vc4_resource_bo_alloc(struct vc4_resource *rsc)
rsc->cube_map_stride * (prsc->array_size - 1));
}
- vc4_bo_unreference(&rsc->bo);
- rsc->bo = vc4_bo_alloc(vc4_screen(pscreen),
- rsc->slices[0].offset +
- rsc->slices[0].size +
- rsc->cube_map_stride * (prsc->array_size - 1),
- "resource");
+ bo = vc4_bo_alloc(vc4_screen(pscreen),
+ rsc->slices[0].offset +
+ rsc->slices[0].size +
+ rsc->cube_map_stride * (prsc->array_size - 1),
+ "resource");
+ if (bo) {
+ vc4_bo_unreference(&rsc->bo);
+ rsc->bo = bo;
+ return true;
+ } else {
+ return false;
+ }
}
static void
@@ -101,21 +108,27 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
char *buf;
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
- vc4_resource_bo_alloc(rsc);
+ if (vc4_resource_bo_alloc(rsc)) {
- /* If it might be bound as one of our vertex buffers, make
- * sure we re-emit vertex buffer state.
- */
- if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
- vc4->dirty |= VC4_DIRTY_VTXBUF;
+ /* If it might be bound as one of our vertex buffers,
+ * make sure we re-emit vertex buffer state.
+ */
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc4->dirty |= VC4_DIRTY_VTXBUF;
+ } else {
+ /* If we failed to reallocate, flush everything so
+ * that we don't violate any syncing requirements.
+ */
+ vc4_flush(pctx);
+ }
} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
if (vc4_cl_references_bo(pctx, rsc->bo)) {
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
prsc->last_level == 0 &&
prsc->width0 == box->width &&
prsc->height0 == box->height &&
- prsc->depth0 == box->depth) {
- vc4_resource_bo_alloc(rsc);
+ prsc->depth0 == box->depth &&
+ vc4_resource_bo_alloc(rsc)) {
if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
vc4->dirty |= VC4_DIRTY_VTXBUF;
} else {
@@ -389,8 +402,7 @@ vc4_resource_create(struct pipe_screen *pscreen,
rsc->vc4_format = get_resource_texture_format(prsc);
vc4_setup_slices(rsc);
- vc4_resource_bo_alloc(rsc);
- if (!rsc->bo)
+ if (!vc4_resource_bo_alloc(rsc))
goto fail;
return prsc;
@@ -668,7 +680,7 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
uint16_t *dst = data;
struct pipe_transfer *src_transfer = NULL;
- uint32_t *src;
+ const uint32_t *src;
if (ib->user_buffer) {
src = ib->user_buffer;
} else {
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index bb867611804..88ee48c0e8f 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -184,6 +184,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 78aa344ab1d..a234ce53b20 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -420,6 +420,23 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
cso->width = framebuffer->width;
cso->height = framebuffer->height;
+ /* If we're binding to uninitialized buffers, no need to load their
+ * contents before drawing..
+ */
+ if (cso->cbufs[0]) {
+ struct vc4_resource *rsc =
+ vc4_resource(cso->cbufs[0]->texture);
+ if (!rsc->writes)
+ vc4->cleared |= PIPE_CLEAR_COLOR0;
+ }
+
+ if (cso->zsbuf) {
+ struct vc4_resource *rsc =
+ vc4_resource(cso->zsbuf->texture);
+ if (!rsc->writes)
+ vc4->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+ }
+
/* Nonzero texture mipmap levels are laid out as if they were in
* power-of-two-sized spaces. The renderbuffer config infers its
* stride from the width parameter, so we need to configure our
@@ -583,6 +600,10 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;
prsc = vc4_resource_create(pctx->screen, &tmpl);
+ if (!prsc) {
+ free(so);
+ return NULL;
+ }
rsc = vc4_resource(prsc);
clone = vc4_resource(prsc);
clone->shadow_parent = &shadow_parent->base.b;
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
index cca379d47ab..26a4f7736e3 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -218,6 +218,7 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_CLEAR_TEXTURE:
return 0;
case PIPE_CAP_VENDOR_ID:
return 0x1af4;