diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 67 |
2 files changed, 23 insertions, 55 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c58467ddcb0..e180fc89d51 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6761,6 +6761,16 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, return true; } +static void si_fix_num_sgprs(struct si_shader *shader) +{ + unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */ + + if (shader->config.scratch_bytes_per_wave) + min_sgprs += 1; /* scratch wave offset */ + + shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs); +} + int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, struct pipe_debug_callback *debug) @@ -6850,6 +6860,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, } } + si_fix_num_sgprs(shader); si_shader_dump(sscreen, shader, debug, shader->selector->info.processor, stderr); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b7ebb48e6a9..1ce7ecc2cf3 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -306,7 +306,7 @@ static void si_set_tesseval_regs(struct si_shader *shader, static void si_shader_ls(struct si_shader *shader) { struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; + unsigned num_user_sgprs; unsigned vgpr_comp_cnt; uint64_t va; @@ -322,18 +322,12 @@ static void si_shader_ls(struct si_shader *shader) vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; num_user_sgprs = SI_LS_NUM_USER_SGPR; - num_sgprs = shader->config.num_sgprs; - if (num_user_sgprs > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 2; - } - assert(num_sgprs <= 104); si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40); shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B528_SGPRS((num_sgprs - 1) / 8) | + S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B528_DX10_CLAMP(1) | S_00B528_FLOAT_MODE(shader->config.float_mode); @@ -344,7 +338,7 @@ static void si_shader_ls(struct si_shader *shader) static void si_shader_hs(struct si_shader *shader) { struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; + unsigned num_user_sgprs; uint64_t va; pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); @@ -355,20 +349,12 @@ static void si_shader_hs(struct si_shader *shader) si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); num_user_sgprs = SI_TCS_NUM_USER_SGPR; - num_sgprs = shader->config.num_sgprs; - /* One SGPR after user SGPRs is pre-loaded with tessellation factor - * buffer offset. */ - if ((num_user_sgprs + 1) > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 1 + 2; - } - assert(num_sgprs <= 104); si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40); si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B428_SGPRS((num_sgprs - 1) / 8) | + S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B428_DX10_CLAMP(1) | S_00B428_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, @@ -379,7 +365,7 @@ static void si_shader_hs(struct si_shader *shader) static void si_shader_es(struct si_shader *shader) { struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; + unsigned num_user_sgprs; unsigned vgpr_comp_cnt; uint64_t va; @@ -400,21 +386,13 @@ static void si_shader_es(struct si_shader *shader) } else unreachable("invalid shader selector type"); - num_sgprs = shader->config.num_sgprs; - /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */ - if ((num_user_sgprs + 1) > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 1 + 2; - } - assert(num_sgprs <= 104); - si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, shader->selector->esgs_itemsize / 4); si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40); si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES, S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B328_SGPRS((num_sgprs - 1) / 8) | + S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B328_DX10_CLAMP(1) | S_00B328_FLOAT_MODE(shader->config.float_mode)); @@ -458,7 +436,7 @@ static void si_shader_gs(struct si_shader *shader) unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2; unsigned gs_num_invocations = shader->selector->gs_num_invocations; struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; + unsigned num_user_sgprs; uint64_t va; unsigned max_stream = shader->selector->max_gs_stream; @@ -495,17 +473,10 @@ static void si_shader_gs(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40); num_user_sgprs = SI_GS_NUM_USER_SGPR; - num_sgprs = shader->config.num_sgprs; - /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */ - if ((num_user_sgprs + 2) > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 2 + 2; - } - assert(num_sgprs <= 104); si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B228_SGPRS((num_sgprs - 1) / 8) | + S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B228_DX10_CLAMP(1) | S_00B228_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, @@ -523,7 +494,7 @@ static void si_shader_gs(struct si_shader *shader) static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) { struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; + unsigned num_user_sgprs; unsigned nparams, vgpr_comp_cnt; uint64_t va; unsigned window_space = @@ -566,13 +537,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) } else unreachable("invalid shader selector type"); - num_sgprs = shader->config.num_sgprs; - if (num_user_sgprs > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 2; - } - assert(num_sgprs <= 104); - /* VS is required to export at least one param. */ nparams = MAX2(shader->info.nr_param_exports, 1); si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG, @@ -594,7 +558,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40); si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B128_SGPRS((num_sgprs - 1) / 8) | + S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B128_DX10_CLAMP(1) | S_00B128_FLOAT_MODE(shader->config.float_mode)); @@ -684,7 +648,7 @@ static void si_shader_ps(struct si_shader *shader) struct tgsi_shader_info *info = &shader->selector->info; struct si_pm4_state *pm4; unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask; - unsigned num_sgprs, num_user_sgprs; + unsigned num_user_sgprs; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); uint64_t va; bool has_centroid; @@ -772,17 +736,10 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); num_user_sgprs = SI_PS_NUM_USER_SGPR; - num_sgprs = shader->config.num_sgprs; - /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */ - if ((num_user_sgprs + 1) > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 1 + 2; - } - assert(num_sgprs <= 104); si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B028_SGPRS((num_sgprs - 1) / 8) | + S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B028_DX10_CLAMP(1) | S_00B028_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, |