diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 34 |
4 files changed, 53 insertions, 20 deletions
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index d1864d0cf82..c497c2ff147 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2127,9 +2127,19 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx, if (sctx->vertex_buffer_pointer_dirty) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + - SI_SGPR_VERTEX_BUFFERS * 4; + /* Find the location of the VB descriptor pointer. */ + /* TODO: In the future, the pointer will be packed in unused + * bits of the first 2 VB descriptors. */ + unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR; + if (sctx->b.chip_class >= GFX9) { + if (sctx->tes_shader.cso) + sh_dw_offset = GFX9_TCS_NUM_USER_SGPR; + else if (sctx->gs_shader.cso) + sh_dw_offset = GFX9_GS_NUM_USER_SGPR; + } + + unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4; si_emit_shader_pointer_head(cs, sh_offset, 1); si_emit_shader_pointer_body(sctx->screen, cs, sctx->vb_descriptors_buffer->gpu_address + diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 54f90fcac72..1f5af71653a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4558,8 +4558,6 @@ static void declare_global_desc_pointers(struct si_shader_context *ctx, static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx, struct si_function_info *fninfo) { - ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR, - ac_array_in_const32_addr_space(ctx->v4i32)); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id); @@ -4661,6 +4659,8 @@ static void create_function(struct si_shader_context *ctx) declare_per_stage_desc_pointers(ctx, &fninfo, true); declare_vs_specific_input_sgprs(ctx, &fninfo); + ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->v4i32)); if (shader->key.as_es) { ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4733,6 +4733,10 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->v4i32)); /* VGPRs (first TCS, then VS) */ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id); @@ -4790,9 +4794,6 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - if (!HAVE_32BIT_POINTERS) - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ } @@ -4800,6 +4801,10 @@ static void create_function(struct si_shader_context *ctx) declare_samplers_and_images(ctx, &fninfo, ctx->type == PIPE_SHADER_GEOMETRY); } + if (ctx->type == PIPE_SHADER_VERTEX) { + ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->v4i32)); + } /* VGPRs (first GS, then VS/TES) */ ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); @@ -7324,7 +7329,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); - add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1b1f650f869..471f2e9f589 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -169,11 +169,7 @@ enum { SI_NUM_RESOURCE_SGPRS, /* all VS variants */ - SI_SGPR_VERTEX_BUFFERS = SI_NUM_RESOURCE_SGPRS, -#if !HAVE_32BIT_POINTERS - SI_SGPR_VERTEX_BUFFERS_HI, -#endif - SI_SGPR_BASE_VERTEX, + SI_SGPR_BASE_VERTEX = SI_NUM_RESOURCE_SGPRS, SI_SGPR_START_INSTANCE, SI_SGPR_DRAWID, SI_SGPR_VS_STATE_BITS, @@ -209,6 +205,9 @@ enum { GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR, GFX9_SGPR_TCS_OUT_OFFSETS, GFX9_SGPR_TCS_OUT_LAYOUT, +#if !HAVE_32BIT_POINTERS + GFX9_SGPR_align_for_vb_pointer, +#endif GFX9_TCS_NUM_USER_SGPR, /* GS limits */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ad38a0f31b6..3643ba500b6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -459,6 +459,17 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader) return shader->pm4; } +static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs) +{ + /* Add the pointer to VBO descriptors. */ + if (HAVE_32BIT_POINTERS) { + return num_always_on_user_sgprs + 1; + } else { + assert(num_always_on_user_sgprs % 2 == 0); + return num_always_on_user_sgprs + 2; + } +} + static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; @@ -488,7 +499,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B528_DX10_CLAMP(1) | S_00B528_FLOAT_MODE(shader->config.float_mode); - shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) | + shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR)) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } @@ -515,9 +526,12 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) */ ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1; + unsigned num_user_sgprs = + si_get_num_vs_user_sgprs(GFX9_TCS_NUM_USER_SGPR); + shader->config.rsrc2 = - S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) | - S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) | + S_00B42C_USER_SGPR(num_user_sgprs) | + S_00B42C_USER_SGPR_MSB(num_user_sgprs >> 5) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } else { si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); @@ -562,7 +576,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) if (shader->selector->type == PIPE_SHADER_VERTEX) { /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0; - num_user_sgprs = SI_VS_NUM_USER_SGPR; + num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR); } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2; num_user_sgprs = SI_TES_NUM_USER_SGPR; @@ -769,6 +783,12 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) else gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */ + unsigned num_user_sgprs; + if (es_type == PIPE_SHADER_VERTEX) + num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_GS_NUM_USER_SGPR); + else + num_user_sgprs = GFX9_GS_NUM_USER_SGPR; + gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info); si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); @@ -781,8 +801,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, - S_00B22C_USER_SGPR(GFX9_GS_NUM_USER_SGPR) | - S_00B22C_USER_SGPR_MSB(GFX9_GS_NUM_USER_SGPR >> 5) | + S_00B22C_USER_SGPR(num_user_sgprs) | + S_00B22C_USER_SGPR_MSB(num_user_sgprs >> 5) | S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) | S_00B22C_LDS_SIZE(gs_info.lds_size) | @@ -887,7 +907,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, num_user_sgprs = SI_SGPR_VS_BLIT_DATA + info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]; } else { - num_user_sgprs = SI_VS_NUM_USER_SGPR; + num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR); } } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = enable_prim_id ? 3 : 2; |