summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c14
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c16
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h9
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c34
4 files changed, 53 insertions, 20 deletions
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index d1864d0cf82..c497c2ff147 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2127,9 +2127,19 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx,
if (sctx->vertex_buffer_pointer_dirty) {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] +
- SI_SGPR_VERTEX_BUFFERS * 4;
+ /* Find the location of the VB descriptor pointer. */
+ /* TODO: In the future, the pointer will be packed in unused
+ * bits of the first 2 VB descriptors. */
+ unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
+ if (sctx->b.chip_class >= GFX9) {
+ if (sctx->tes_shader.cso)
+ sh_dw_offset = GFX9_TCS_NUM_USER_SGPR;
+ else if (sctx->gs_shader.cso)
+ sh_dw_offset = GFX9_GS_NUM_USER_SGPR;
+ }
+
+ unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4;
si_emit_shader_pointer_head(cs, sh_offset, 1);
si_emit_shader_pointer_body(sctx->screen, cs,
sctx->vb_descriptors_buffer->gpu_address +
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 54f90fcac72..1f5af71653a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4558,8 +4558,6 @@ static void declare_global_desc_pointers(struct si_shader_context *ctx,
static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
struct si_function_info *fninfo)
{
- ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
- ac_array_in_const32_addr_space(ctx->v4i32));
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
@@ -4661,6 +4659,8 @@ static void create_function(struct si_shader_context *ctx)
declare_per_stage_desc_pointers(ctx, &fninfo, true);
declare_vs_specific_input_sgprs(ctx, &fninfo);
+ ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+ ac_array_in_const32_addr_space(ctx->v4i32));
if (shader->key.as_es) {
ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
@@ -4733,6 +4733,10 @@ static void create_function(struct si_shader_context *ctx)
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+ ac_array_in_const32_addr_space(ctx->v4i32));
/* VGPRs (first TCS, then VS) */
add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id);
@@ -4790,9 +4794,6 @@ static void create_function(struct si_shader_context *ctx)
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
- if (!HAVE_32BIT_POINTERS)
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
}
@@ -4800,6 +4801,10 @@ static void create_function(struct si_shader_context *ctx)
declare_samplers_and_images(ctx, &fninfo,
ctx->type == PIPE_SHADER_GEOMETRY);
}
+ if (ctx->type == PIPE_SHADER_VERTEX) {
+ ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+ ac_array_in_const32_addr_space(ctx->v4i32));
+ }
/* VGPRs (first GS, then VS/TES) */
ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
@@ -7324,7 +7329,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
- add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1b1f650f869..471f2e9f589 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -169,11 +169,7 @@ enum {
SI_NUM_RESOURCE_SGPRS,
/* all VS variants */
- SI_SGPR_VERTEX_BUFFERS = SI_NUM_RESOURCE_SGPRS,
-#if !HAVE_32BIT_POINTERS
- SI_SGPR_VERTEX_BUFFERS_HI,
-#endif
- SI_SGPR_BASE_VERTEX,
+ SI_SGPR_BASE_VERTEX = SI_NUM_RESOURCE_SGPRS,
SI_SGPR_START_INSTANCE,
SI_SGPR_DRAWID,
SI_SGPR_VS_STATE_BITS,
@@ -209,6 +205,9 @@ enum {
GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
GFX9_SGPR_TCS_OUT_OFFSETS,
GFX9_SGPR_TCS_OUT_LAYOUT,
+#if !HAVE_32BIT_POINTERS
+ GFX9_SGPR_align_for_vb_pointer,
+#endif
GFX9_TCS_NUM_USER_SGPR,
/* GS limits */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ad38a0f31b6..3643ba500b6 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -459,6 +459,17 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
return shader->pm4;
}
+static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
+{
+ /* Add the pointer to VBO descriptors. */
+ if (HAVE_32BIT_POINTERS) {
+ return num_always_on_user_sgprs + 1;
+ } else {
+ assert(num_always_on_user_sgprs % 2 == 0);
+ return num_always_on_user_sgprs + 2;
+ }
+}
+
static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
{
struct si_pm4_state *pm4;
@@ -488,7 +499,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B528_DX10_CLAMP(1) |
S_00B528_FLOAT_MODE(shader->config.float_mode);
- shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) |
+ shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR)) |
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
@@ -515,9 +526,12 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
*/
ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
+ unsigned num_user_sgprs =
+ si_get_num_vs_user_sgprs(GFX9_TCS_NUM_USER_SGPR);
+
shader->config.rsrc2 =
- S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) |
- S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) |
+ S_00B42C_USER_SGPR(num_user_sgprs) |
+ S_00B42C_USER_SGPR_MSB(num_user_sgprs >> 5) |
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
} else {
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
@@ -562,7 +576,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
if (shader->selector->type == PIPE_SHADER_VERTEX) {
/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
- num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR);
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;
num_user_sgprs = SI_TES_NUM_USER_SGPR;
@@ -769,6 +783,12 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
else
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
+ unsigned num_user_sgprs;
+ if (es_type == PIPE_SHADER_VERTEX)
+ num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_GS_NUM_USER_SGPR);
+ else
+ num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
+
gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info);
si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
@@ -781,8 +801,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
S_00B228_FLOAT_MODE(shader->config.float_mode) |
S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
- S_00B22C_USER_SGPR(GFX9_GS_NUM_USER_SGPR) |
- S_00B22C_USER_SGPR_MSB(GFX9_GS_NUM_USER_SGPR >> 5) |
+ S_00B22C_USER_SGPR(num_user_sgprs) |
+ S_00B22C_USER_SGPR_MSB(num_user_sgprs >> 5) |
S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) |
S_00B22C_LDS_SIZE(gs_info.lds_size) |
@@ -887,7 +907,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
num_user_sgprs = SI_SGPR_VS_BLIT_DATA +
info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
} else {
- num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR);
}
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = enable_prim_id ? 3 : 2;