summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2019-05-08 01:40:29 +0200
committerMarek Olšák <[email protected]>2019-07-03 15:51:12 -0400
commit77e715541c0423c4f962e9fcc8862d45bbc2eeb6 (patch)
tree0da0eca35ed0db2ddf8525af17182ed417f1c76b
parent4ecc39e1aa1568f19ebf54a99ffe14643bac7d15 (diff)
radeonsi/gfx10: emit VGT_GS_OUT_PRIM_TYPE from draw and add it to VS_STATE
With NGG, the VGT_GS_OUT_PRIM_TYPE can change without a shader change. The VS_STATE is required for both streamout and culling from a vertex shader without pre-compiling outprim-specific variants. We could consider compiling specialized variants in the future. We could also consider compiling the NGG logic as an epilog. Acked-by: Bas Nieuwenhuizen <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_gfx_cs.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c58
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c35
5 files changed, 52 insertions, 48 deletions
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index fb9286d6b48..bb34b07095a 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -463,7 +463,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0x00000000;
- ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0x00000000;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 39af557bcae..9dbf08fa95f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -246,6 +246,8 @@ enum {
#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE
#define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1)
#define C_VS_STATE_INDEXED 0xFFFFFFFD
+#define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2)
+#define C_VS_STATE_OUTPRIM 0xFFFFFFF3
#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8)
#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)
@@ -666,7 +668,6 @@ struct si_shader {
unsigned vgt_gsvs_ring_offset_1;
unsigned vgt_gsvs_ring_offset_2;
unsigned vgt_gsvs_ring_offset_3;
- unsigned vgt_gs_out_prim_type;
unsigned vgt_gsvs_ring_itemsize;
unsigned vgt_gs_max_vert_out;
unsigned vgt_gs_vert_itemsize;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 23c7b3245f5..678f87cd73d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -298,10 +298,9 @@ enum si_tracked_reg {
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
- SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 4 consecutive registers */
+ SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 3 consecutive registers */
SI_TRACKED_VGT_GSVS_RING_OFFSET_2,
SI_TRACKED_VGT_GSVS_RING_OFFSET_3,
- SI_TRACKED_VGT_GS_OUT_PRIM_TYPE,
SI_TRACKED_VGT_GSVS_RING_ITEMSIZE,
SI_TRACKED_VGT_GS_MAX_VERT_OUT,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 998c21d001e..3d2a4d72891 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -549,6 +549,30 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
return ia_multi_vgt_param;
}
+static unsigned si_conv_prim_to_gs_out(unsigned mode)
+{
+ static const int prim_conv[] = {
+ [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
+ [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
+ };
+ assert(mode < ARRAY_SIZE(prim_conv));
+
+ return prim_conv[mode];
+}
+
/* rast_prim is the primitive type after GS. */
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
@@ -556,24 +580,34 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
enum pipe_prim_type rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- /* Skip this if not rendering lines. */
- if (!util_prim_is_lines(rast_prim))
+ if (likely(rast_prim == sctx->last_rast_prim &&
+ rs->pa_sc_line_stipple == sctx->last_sc_line_stipple))
return;
- if (rast_prim == sctx->last_rast_prim &&
- rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
- return;
+ if (util_prim_is_lines(rast_prim)) {
+ /* For lines, reset the stipple pattern at each primitive. Otherwise,
+ * reset the stipple pattern at each packet (line strips, line loops).
+ */
+ radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+ rs->pa_sc_line_stipple |
+ S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
+ sctx->context_roll = true;
+ }
- /* For lines, reset the stipple pattern at each primitive. Otherwise,
- * reset the stipple pattern at each packet (line strips, line loops).
- */
- radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
- rs->pa_sc_line_stipple |
- S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
+ if (rast_prim != sctx->last_rast_prim &&
+ (sctx->ngg || sctx->gs_shader.cso)) {
+ unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
+ radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
+ sctx->context_roll = true;
+
+ if (sctx->chip_class >= GFX10) {
+ sctx->current_vs_state &= C_VS_STATE_OUTPRIM;
+ sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out);
+ }
+ }
sctx->last_rast_prim = rast_prim;
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
- sctx->context_roll = true;
}
static void si_emit_vs_state(struct si_context *sctx,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index be9ab3bcdd6..53c1e7f44e8 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -622,30 +622,6 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
}
-static unsigned si_conv_prim_to_gs_out(unsigned mode)
-{
- static const int prim_conv[] = {
- [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
- [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
- };
- assert(mode < ARRAY_SIZE(prim_conv));
-
- return prim_conv[mode];
-}
-
void gfx9_get_gs_info(struct si_shader_selector *es,
struct si_shader_selector *gs,
struct gfx9_gs_info *out)
@@ -753,14 +729,12 @@ static void si_emit_shader_gs(struct si_context *sctx)
return;
/* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
- * R_028A68_VGT_GSVS_RING_OFFSET_3, R_028A6C_VGT_GS_OUT_PRIM_TYPE */
- radeon_opt_set_context_reg4(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
+ * R_028A68_VGT_GSVS_RING_OFFSET_3 */
+ radeon_opt_set_context_reg3(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
SI_TRACKED_VGT_GSVS_RING_OFFSET_1,
shader->ctx_reg.gs.vgt_gsvs_ring_offset_1,
shader->ctx_reg.gs.vgt_gsvs_ring_offset_2,
- shader->ctx_reg.gs.vgt_gsvs_ring_offset_3,
- shader->ctx_reg.gs.vgt_gs_out_prim_type);
-
+ shader->ctx_reg.gs.vgt_gsvs_ring_offset_3);
/* R_028AB0_VGT_GSVS_RING_ITEMSIZE */
radeon_opt_set_context_reg(sctx, R_028AB0_VGT_GSVS_RING_ITEMSIZE,
@@ -841,9 +815,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
offset += num_components[2] * sel->gs_max_out_vertices;
shader->ctx_reg.gs.vgt_gsvs_ring_offset_3 = offset;
- shader->ctx_reg.gs.vgt_gs_out_prim_type =
- si_conv_prim_to_gs_out(sel->gs_output_prim);
-
if (max_stream >= 3)
offset += num_components[3] * sel->gs_max_out_vertices;
shader->ctx_reg.gs.vgt_gsvs_ring_itemsize = offset;