diff options
author | Marek Olšák <[email protected]> | 2016-01-20 00:01:31 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-01-22 22:05:42 +0100 |
commit | 5944f3d2fc8ee3d0b9865865443d2ecfdb19bd1a (patch) | |
tree | d01cd84363a6b8a0e0d4eb76a08fbf421efc5e8a | |
parent | 97648229e4005111a553b78df27dc131f11a8277 (diff) |
radeonsi: enable late VS allocation (v3)
v2: take the number of CUs into account
v3: change in LS allocation
Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 76d5017a82d..c010998feab 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3729,12 +3729,32 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); if (sctx->b.chip_class >= CIK) { - si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); - si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); - si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0)); + + if (sscreen->b.info.num_good_compute_units / + (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { + /* Too few available compute units per SH. Disallowing + * VS to run on CU0 could hurt us more than late VS + * allocation would help. + * + * LATE_ALLOC_VS = 2 is the highest safe number. + */ + si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); + si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); + si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); + } else { + /* Set LATE_ALLOC_VS == 31. It should be less than + * the number of scratch waves. Limitations: + * - VS can't execute on CU0. + * - If HS writes outputs to LDS, LS can't execute on CU0. + */ + si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe)); + si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); + si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); + } + si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); } |