aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/radeonsi/si_gfx_cs.c5
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c20
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h11
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c40
5 files changed, 59 insertions, 18 deletions
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index f34286ecaec..5df6e080792 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -464,6 +464,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->index_ring_offset = 0;
+ STATIC_ASSERT(SI_NUM_TRACKED_REGS <= sizeof(ctx->tracked_regs.reg_saved) * 8);
+
if (has_clear_state) {
ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000;
@@ -480,7 +482,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
- ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__VS] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__CL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1610cddac3c..8b5c70068e4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -709,6 +709,7 @@ struct si_shader {
/*For save precompute registers value */
unsigned vgt_tf_param; /* VGT_TF_PARAM */
unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+ unsigned pa_cl_vs_out_cntl;
unsigned ge_cntl;
};
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 8c47002c99f..86ca4bac57a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -801,12 +801,20 @@ static void si_emit_clip_regs(struct si_context *sctx)
culldist_mask |= clipdist_mask;
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
- radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL,
- vs_sel->pa_cl_vs_out_cntl |
- S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
- clipdist_mask | (culldist_mask << 8));
+ unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+ clipdist_mask | (culldist_mask << 8);
+
+ if (sctx->chip_class >= GFX10) {
+ radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
+ SI_TRACKED_PA_CL_VS_OUT_CNTL__CL,
+ pa_cl_cntl,
+ ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
+ } else {
+ radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
+ SI_TRACKED_PA_CL_VS_OUT_CNTL__CL,
+ vs_sel->pa_cl_vs_out_cntl | pa_cl_cntl);
+ }
radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL,
SI_TRACKED_PA_CL_CLIP_CNTL,
rs->pa_cl_clip_cntl |
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 3480c4f7eeb..e3e6cf293e1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -259,6 +259,14 @@ struct si_shader_data {
uint32_t sh_base[SI_NUM_SHADERS];
};
+#define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \
+ (S_02881C_USE_VTX_POINT_SIZE(1) | \
+ S_02881C_USE_VTX_EDGE_FLAG(1) | \
+ S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | \
+ S_02881C_USE_VTX_VIEWPORT_INDX(1) | \
+ S_02881C_VS_OUT_MISC_VEC_ENA(1) | \
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1))
+
/* The list of registers whose emitted values are remembered by si_context. */
enum si_tracked_reg {
SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */
@@ -283,7 +291,8 @@ enum si_tracked_reg {
SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL,
+ SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, /* set with SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK*/
+ SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, /* set with ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK */
SI_TRACKED_PA_CL_CLIP_CNTL,
SI_TRACKED_PA_SC_BINNER_CNTL_0,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6aba3f39ad4..a1ec9884ddf 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1007,6 +1007,11 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx,
SI_TRACKED_PA_CL_NGG_CNTL,
shader->ctx_reg.ngg.pa_cl_ngg_cntl);
+ radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
+ SI_TRACKED_PA_CL_VS_OUT_CNTL__VS,
+ shader->pa_cl_vs_out_cntl,
+ SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
+
if (initial_cdw != sctx->gfx_cs->current.cdw)
sctx->context_roll = true;
}
@@ -1087,6 +1092,19 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs)
return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
}
+static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg)
+{
+ bool misc_vec_ena =
+ sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) ||
+ sel->info.writes_layer || sel->info.writes_viewport_index;
+ return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
+ S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
+ S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena);
+}
+
/**
* Prepare the PM4 image for \p shader, which will run as a merged ESGS shader
* in NGG mode.
@@ -1232,6 +1250,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
*/
shader->ctx_reg.ngg.pa_cl_ngg_cntl =
S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX);
+ shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
shader->ge_cntl =
S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
@@ -1323,6 +1342,13 @@ static void si_emit_shader_vs(struct si_context *sctx)
S_028A44_GS_PRIMS_PER_SUBGRP(126) |
S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
}
+
+ if (sctx->chip_class >= GFX10) {
+ radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
+ SI_TRACKED_PA_CL_VS_OUT_CNTL__VS,
+ shader->pa_cl_vs_out_cntl,
+ SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
+ }
}
/**
@@ -1427,6 +1453,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ?
V_02870C_SPI_SHADER_4COMP :
V_02870C_SPI_SHADER_NONE);
+ shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false);
oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
@@ -2856,16 +2883,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
}
/* PA_CL_VS_OUT_CNTL */
- bool misc_vec_ena =
- sel->info.writes_psize || sel->info.writes_edgeflag ||
- sel->info.writes_layer || sel->info.writes_viewport_index;
- sel->pa_cl_vs_out_cntl =
- S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
- S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag) |
- S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
- S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
- S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena);
+ if (sctx->chip_class <= GFX9)
+ sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false);
+
sel->clipdist_mask = sel->info.writes_clipvertex ?
SIX_BITS : sel->info.clipdist_writemask;
sel->culldist_mask = sel->info.culldist_writemask <<