summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorAndres Rodriguez <[email protected]>2017-10-20 18:02:12 -0400
committerBas Nieuwenhuizen <[email protected]>2017-10-21 01:01:44 +0200
commit986c4b0bd4eb5115d656ccbc0351bd824ff7e413 (patch)
tree38a8bbd418377dbee9d56c3f63c606fc1369d246 /src/amd
parentb7c2f7065691aa6523b932e809f22e2a3630ba5a (diff)
radv: hardcode shader WAVE_LIMIT to the maximum value
When WAVE_LIMIT is set, a submission will opt-in for SPI based resource scheduling. Because this mechanism is cooperative, we must ensure that all submissions have this field set, otherwise they will bypass resource arbitration. We always hardcode the field to its maximum value, instead of attempting to calculate an approximate usage. In testing, there were no benefits to using anything other than the maximum. Signed-off-by: Andres Rodriguez <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/vulkan/si_cmd_buffer.c27
1 files changed, 18 insertions, 9 deletions
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index de3b3883fc8..ac3dff8e9eb 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -179,7 +179,8 @@ si_emit_compute(struct radv_physical_device *physical_device,
radeon_emit(cs, 0);
radeon_emit(cs, 0);
- radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
+ radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+ S_00B854_WAVES_PER_SH(0x3));
radeon_emit(cs, 0);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
@@ -432,11 +433,15 @@ si_emit_config(struct radv_physical_device *physical_device,
if (physical_device->rad_info.chip_class >= CIK) {
if (physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
} else {
- radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
- radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
- radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+ S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+ S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
* but we don't use on-chip GS.
@@ -445,7 +450,8 @@ si_emit_config(struct radv_physical_device *physical_device,
S_028A44_ES_VERTS_PER_SUBGRP(64) |
S_028A44_GS_PRIMS_PER_SUBGRP(4));
}
- radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
if (physical_device->rad_info.num_good_compute_units /
(physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) {
@@ -455,7 +461,8 @@ si_emit_config(struct radv_physical_device *physical_device,
*
* LATE_ALLOC_VS = 2 is the highest safe number.
*/
- radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ S_00B118_CU_EN(0xffff) | S_00B118_WAVE_LIMIT(0x3F) );
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
} else {
/* Set LATE_ALLOC_VS == 31. It should be less than
@@ -463,11 +470,13 @@ si_emit_config(struct radv_physical_device *physical_device,
* - VS can't execute on CU0.
* - If HS writes outputs to LDS, LS can't execute on CU0.
*/
- radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
+ radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ S_00B118_CU_EN(0xfffe) | S_00B118_WAVE_LIMIT(0x3F));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
}
- radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+ S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
}
if (physical_device->rad_info.chip_class >= VI) {