summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c20
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c24
4 files changed, 33 insertions, 13 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 6c57fa2a75a..59abdb755e1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5846,9 +5846,14 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx,
struct si_shader *shader = ctx->shader;
params[ctx->param_vertex_id = (*num_params)++] = ctx->i32;
- params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32;
- params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32;
- params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
+ if (shader->key.as_ls) {
+ params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32;
+ params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
+ } else {
+ params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
+ params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32;
+ }
+ params[(*num_params)++] = ctx->i32; /* unused */
if (!shader->is_gs_copy_shader) {
/* Vertex load indices. */
@@ -7368,11 +7373,14 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
key->vs_prolog.states = *prolog_key;
key->vs_prolog.num_input_sgprs = num_input_sgprs;
key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
+ key->vs_prolog.as_ls = shader_out->key.as_ls;
- if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL)
+ if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
+ key->vs_prolog.as_ls = 1;
key->vs_prolog.num_merged_next_stage_vgprs = 2;
- else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY)
+ } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
key->vs_prolog.num_merged_next_stage_vgprs = 5;
+ }
/* Set the instanceID flag. */
for (unsigned i = 0; i < info->num_inputs; i++)
@@ -8379,7 +8387,7 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
ctx->param_vertex_id = first_vs_vgpr;
- ctx->param_instance_id = first_vs_vgpr + 3;
+ ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
params = alloca(num_all_input_regs * sizeof(LLVMTypeRef));
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index fc26c882701..1eb9c0bc29d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -437,6 +437,7 @@ union si_shader_part_key {
/* For merged stages such as LS-HS, HS input VGPRs are first. */
unsigned num_merged_next_stage_vgprs:3;
unsigned last_input:4;
+ unsigned as_ls:1;
/* Prologs for monolithic shaders shouldn't set EXEC. */
unsigned is_monolithic:1;
} vs_prolog;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 39494cc1a17..938e7fb06a9 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4341,6 +4341,7 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+ si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (sctx->b.chip_class < CIK)
si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6a6fce6939c..6b910778536 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -458,8 +458,10 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
/* We need at least 2 components for LS.
- * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
- vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
+ * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
@@ -491,8 +493,10 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40);
/* We need at least 2 components for LS.
- * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
- ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
+ * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
if (shader->config.scratch_bytes_per_wave) {
fprintf(stderr, "HS: scratch buffer unsupported");
@@ -544,7 +548,8 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+ vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
num_user_sgprs = SI_VS_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;
@@ -759,7 +764,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
struct gfx9_gs_info gs_info;
if (es_type == PIPE_SHADER_VERTEX)
- es_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0;
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+ es_vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
else if (es_type == PIPE_SHADER_TESS_EVAL)
es_vgpr_comp_cnt = shader->key.part.gs.es->info.uses_primid ? 3 : 2;
else
@@ -876,7 +882,11 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
+ * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ vgpr_comp_cnt = enable_prim_id ? 2 : (shader->info.uses_instanceid ? 1 : 0);
num_user_sgprs = SI_VS_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;