summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/amd/vulkan/si_cmd_buffer.c27
1 files changed, 18 insertions, 9 deletions
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index de3b3883fc8..ac3dff8e9eb 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -179,7 +179,8 @@ si_emit_compute(struct radv_physical_device *physical_device,
radeon_emit(cs, 0);
radeon_emit(cs, 0);
- radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
+ radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+ S_00B854_WAVES_PER_SH(0x3));
radeon_emit(cs, 0);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
@@ -432,11 +433,15 @@ si_emit_config(struct radv_physical_device *physical_device,
if (physical_device->rad_info.chip_class >= CIK) {
if (physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
} else {
- radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
- radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
- radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+ S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+ S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
* but we don't use on-chip GS.
@@ -445,7 +450,8 @@ si_emit_config(struct radv_physical_device *physical_device,
S_028A44_ES_VERTS_PER_SUBGRP(64) |
S_028A44_GS_PRIMS_PER_SUBGRP(4));
}
- radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
if (physical_device->rad_info.num_good_compute_units /
(physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) {
@@ -455,7 +461,8 @@ si_emit_config(struct radv_physical_device *physical_device,
*
* LATE_ALLOC_VS = 2 is the highest safe number.
*/
- radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ S_00B118_CU_EN(0xffff) | S_00B118_WAVE_LIMIT(0x3F) );
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
} else {
/* Set LATE_ALLOC_VS == 31. It should be less than
@@ -463,11 +470,13 @@ si_emit_config(struct radv_physical_device *physical_device,
* - VS can't execute on CU0.
* - If HS writes outputs to LDS, LS can't execute on CU0.
*/
- radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
+ radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ S_00B118_CU_EN(0xfffe) | S_00B118_WAVE_LIMIT(0x3F));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
}
- radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+ S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
}
if (physical_device->rad_info.chip_class >= VI) {