diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_gfx_cs.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 40 |
5 files changed, 59 insertions, 18 deletions
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index f34286ecaec..5df6e080792 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -464,6 +464,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->index_ring_offset = 0; + STATIC_ASSERT(SI_NUM_TRACKED_REGS <= sizeof(ctx->tracked_regs.reg_saved) * 8); + if (has_clear_state) { ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000; @@ -480,7 +482,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000; - ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__VS] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__CL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003; ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1610cddac3c..8b5c70068e4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -709,6 +709,7 @@ struct si_shader { /*For save precompute registers value */ unsigned vgt_tf_param; /* VGT_TF_PARAM */ unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + unsigned pa_cl_vs_out_cntl; unsigned ge_cntl; }; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 8c47002c99f..86ca4bac57a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -801,12 +801,20 @@ static void si_emit_clip_regs(struct si_context *sctx) culldist_mask |= clipdist_mask; unsigned initial_cdw = sctx->gfx_cs->current.cdw; - radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, - SI_TRACKED_PA_CL_VS_OUT_CNTL, - vs_sel->pa_cl_vs_out_cntl | - S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | - clipdist_mask | (culldist_mask << 8)); + unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | + clipdist_mask | (culldist_mask << 8); + + if (sctx->chip_class >= GFX10) { + radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, + pa_cl_cntl, + ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); + } else { + radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, + vs_sel->pa_cl_vs_out_cntl | pa_cl_cntl); + } radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, rs->pa_cl_clip_cntl | diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 3480c4f7eeb..e3e6cf293e1 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -259,6 +259,14 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; +#define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \ + (S_02881C_USE_VTX_POINT_SIZE(1) | \ + S_02881C_USE_VTX_EDGE_FLAG(1) | \ + S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | \ + S_02881C_USE_VTX_VIEWPORT_INDX(1) | \ + S_02881C_VS_OUT_MISC_VEC_ENA(1) | \ + S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1)) + /* The list of registers whose emitted values are remembered by si_context. */ enum si_tracked_reg { SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */ @@ -283,7 +291,8 @@ enum si_tracked_reg { SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, - SI_TRACKED_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, /* set with SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK*/ + SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, /* set with ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK */ SI_TRACKED_PA_CL_CLIP_CNTL, SI_TRACKED_PA_SC_BINNER_CNTL_0, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 6aba3f39ad4..a1ec9884ddf 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1007,6 +1007,11 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, SI_TRACKED_PA_CL_NGG_CNTL, shader->ctx_reg.ngg.pa_cl_ngg_cntl); + radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, + shader->pa_cl_vs_out_cntl, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); + if (initial_cdw != sctx->gfx_cs->current.cdw) sctx->context_roll = true; } @@ -1087,6 +1092,19 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs) return PIPE_PRIM_TRIANGLES; /* worst case for all callers */ } +static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg) +{ + bool misc_vec_ena = + sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) || + sel->info.writes_layer || sel->info.writes_viewport_index; + return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) | + S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) | + S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) | + S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) | + S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | + S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena); +} + /** * Prepare the PM4 image for \p shader, which will run as a merged ESGS shader * in NGG mode. @@ -1232,6 +1250,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader */ shader->ctx_reg.ngg.pa_cl_ngg_cntl = S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX); + shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true); shader->ge_cntl = S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) | @@ -1323,6 +1342,13 @@ static void si_emit_shader_vs(struct si_context *sctx) S_028A44_GS_PRIMS_PER_SUBGRP(126) | S_028A44_GS_INST_PRIMS_IN_SUBGRP(126)); } + + if (sctx->chip_class >= GFX10) { + radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, + shader->pa_cl_vs_out_cntl, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); + } } /** @@ -1427,6 +1453,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE); + shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false); oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0; @@ -2856,16 +2883,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx, } /* PA_CL_VS_OUT_CNTL */ - bool misc_vec_ena = - sel->info.writes_psize || sel->info.writes_edgeflag || - sel->info.writes_layer || sel->info.writes_viewport_index; - sel->pa_cl_vs_out_cntl = - S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) | - S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag) | - S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) | - S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) | - S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | - S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena); + if (sctx->chip_class <= GFX9) + sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false); + sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS : sel->info.clipdist_writemask; sel->culldist_mask = sel->info.culldist_writemask << |