From ffe44d0283e2a987d2083b75e045316ac2898c40 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 13 Apr 2016 14:15:16 +0200 Subject: radeonsi: fold num_user_sgprs where it is possible Reviewed-by: Bas Nieuwenhuizen --- src/gallium/drivers/radeonsi/si_state_shaders.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 1ce7ecc2cf3..d560aae1f7d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -306,7 +306,6 @@ static void si_set_tesseval_regs(struct si_shader *shader, static void si_shader_ls(struct si_shader *shader) { struct si_pm4_state *pm4; - unsigned num_user_sgprs; unsigned vgpr_comp_cnt; uint64_t va; @@ -321,8 +320,6 @@ static void si_shader_ls(struct si_shader *shader) * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; - num_user_sgprs = SI_LS_NUM_USER_SGPR; - si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40); @@ -331,14 +328,13 @@ static void si_shader_ls(struct si_shader *shader) S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B528_DX10_CLAMP(1) | S_00B528_FLOAT_MODE(shader->config.float_mode); - shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) | + shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } static void si_shader_hs(struct si_shader *shader) { struct si_pm4_state *pm4; - unsigned num_user_sgprs; uint64_t va; pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); @@ -348,8 +344,6 @@ static void si_shader_hs(struct si_shader *shader) va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); - num_user_sgprs = SI_TCS_NUM_USER_SGPR; - si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40); si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, @@ -358,7 +352,7 @@ static void si_shader_hs(struct si_shader *shader) S_00B428_DX10_CLAMP(1) | S_00B428_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, - S_00B42C_USER_SGPR(num_user_sgprs) | + S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } @@ -436,7 +430,6 @@ static void si_shader_gs(struct si_shader *shader) unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2; unsigned gs_num_invocations = shader->selector->gs_num_invocations; struct si_pm4_state *pm4; - unsigned num_user_sgprs; uint64_t va; unsigned max_stream = shader->selector->max_gs_stream; @@ -472,15 +465,13 @@ static void si_shader_gs(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40); - num_user_sgprs = SI_GS_NUM_USER_SGPR; - si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B228_DX10_CLAMP(1) | S_00B228_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, - S_00B22C_USER_SGPR(num_user_sgprs) | + S_00B22C_USER_SGPR(SI_GS_NUM_USER_SGPR) | S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } @@ -648,7 +639,6 @@ static void si_shader_ps(struct si_shader *shader) struct tgsi_shader_info *info = &shader->selector->info; struct si_pm4_state *pm4; unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask; - unsigned num_user_sgprs; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); uint64_t va; bool has_centroid; @@ -735,8 +725,6 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); - num_user_sgprs = SI_PS_NUM_USER_SGPR; - si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) | @@ -744,7 +732,7 @@ static void si_shader_ps(struct si_shader *shader) S_00B028_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | - S_00B02C_USER_SGPR(num_user_sgprs) | + S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); /* Prefer RE_Z if the shader is complex enough. The requirement is either: -- cgit v1.2.3