summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-01-20 00:01:31 +0100
committerMarek Olšák <[email protected]>2016-01-22 22:05:42 +0100
commit5944f3d2fc8ee3d0b9865865443d2ecfdb19bd1a (patch)
treed01cd84363a6b8a0e0d4eb76a08fbf421efc5e8a
parent97648229e4005111a553b78df27dc131f11a8277 (diff)
radeonsi: enable late VS allocation (v3)
v2: take the number of CUs into account v3: change in LS allocation Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c26
1 files changed, 23 insertions, 3 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 76d5017a82d..c010998feab 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3729,12 +3729,32 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
if (sctx->b.chip_class >= CIK) {
- si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
+
+ if (sscreen->b.info.num_good_compute_units /
+ (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
+ /* Too few available compute units per SH. Disallowing
+ * VS to run on CU0 could hurt us more than late VS
+ * allocation would help.
+ *
+ * LATE_ALLOC_VS = 2 is the highest safe number.
+ */
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
+ } else {
+ /* Set LATE_ALLOC_VS == 31. It should be less than
+ * the number of scratch waves. Limitations:
+ * - VS can't execute on CU0.
+ * - If HS writes outputs to LDS, LS can't execute on CU0.
+ */
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
+ si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
+ si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
+ }
+
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
}