summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c67
2 files changed, 23 insertions, 55 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c58467ddcb0..e180fc89d51 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6761,6 +6761,16 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
return true;
}
+static void si_fix_num_sgprs(struct si_shader *shader)
+{
+ unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
+
+ if (shader->config.scratch_bytes_per_wave)
+ min_sgprs += 1; /* scratch wave offset */
+
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+}
+
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug)
@@ -6850,6 +6860,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
}
}
+ si_fix_num_sgprs(shader);
si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
stderr);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index b7ebb48e6a9..1ce7ecc2cf3 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -306,7 +306,7 @@ static void si_set_tesseval_regs(struct si_shader *shader,
static void si_shader_ls(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
@@ -322,18 +322,12 @@ static void si_shader_ls(struct si_shader *shader)
vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
num_user_sgprs = SI_LS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- if (num_user_sgprs > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2;
- }
- assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B528_SGPRS((num_sgprs - 1) / 8) |
+ S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B528_DX10_CLAMP(1) |
S_00B528_FLOAT_MODE(shader->config.float_mode);
@@ -344,7 +338,7 @@ static void si_shader_ls(struct si_shader *shader)
static void si_shader_hs(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
uint64_t va;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -355,20 +349,12 @@ static void si_shader_hs(struct si_shader *shader)
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with tessellation factor
- * buffer offset. */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B428_SGPRS((num_sgprs - 1) / 8) |
+ S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B428_DX10_CLAMP(1) |
S_00B428_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
@@ -379,7 +365,7 @@ static void si_shader_hs(struct si_shader *shader)
static void si_shader_es(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
@@ -400,21 +386,13 @@ static void si_shader_es(struct si_shader *shader)
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B328_SGPRS((num_sgprs - 1) / 8) |
+ S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B328_DX10_CLAMP(1) |
S_00B328_FLOAT_MODE(shader->config.float_mode));
@@ -458,7 +436,7 @@ static void si_shader_gs(struct si_shader *shader)
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
uint64_t va;
unsigned max_stream = shader->selector->max_gs_stream;
@@ -495,17 +473,10 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
num_user_sgprs = SI_GS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
- if ((num_user_sgprs + 2) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2 + 2;
- }
- assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B228_SGPRS((num_sgprs - 1) / 8) |
+ S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B228_DX10_CLAMP(1) |
S_00B228_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
@@ -523,7 +494,7 @@ static void si_shader_gs(struct si_shader *shader)
static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned nparams, vgpr_comp_cnt;
uint64_t va;
unsigned window_space =
@@ -566,13 +537,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->config.num_sgprs;
- if (num_user_sgprs > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2;
- }
- assert(num_sgprs <= 104);
-
/* VS is required to export at least one param. */
nparams = MAX2(shader->info.nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
@@ -594,7 +558,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B128_SGPRS((num_sgprs - 1) / 8) |
+ S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(1) |
S_00B128_FLOAT_MODE(shader->config.float_mode));
@@ -684,7 +648,7 @@ static void si_shader_ps(struct si_shader *shader)
struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
@@ -772,17 +736,10 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
num_user_sgprs = SI_PS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B028_SGPRS((num_sgprs - 1) / 8) |
+ S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B028_DX10_CLAMP(1) |
S_00B028_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,