diff options
-rw-r--r-- | src/amd/common/ac_gpu_info.c | 8 | ||||
-rw-r--r-- | src/amd/common/ac_gpu_info.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 8 |
3 files changed, 10 insertions, 7 deletions
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index b8230fd03b1..667070cb2f0 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -496,6 +496,13 @@ bool ac_query_gpu_info(int fd, void *dev_p, } info->r600_has_virtual_memory = true; + /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above + * 16KB makes some SIMDs unoccupied). + * + * LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used. + */ + info->lds_size_per_cu = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024; + assert(util_is_power_of_two_or_zero(dma.available_rings + 1)); assert(util_is_power_of_two_or_zero(compute.available_rings + 1)); @@ -761,6 +768,7 @@ void ac_print_gpu_info(struct radeon_info *info) printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size); printf(" tcc_harvested = %u\n", info->tcc_harvested); printf(" pc_lines = %u\n", info->pc_lines); + printf(" lds_size_per_cu = %u\n", info->lds_size_per_cu); printf("CP info:\n"); printf(" gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 08ded09030a..18c1bb16379 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -97,6 +97,7 @@ struct radeon_info { uint32_t tcc_cache_line_size; bool tcc_harvested; unsigned pc_lines; + uint32_t lds_size_per_cu; /* CP info. */ bool gfx_ib_pad_with_type2; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fa35489ea50..12878cf97b9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1063,13 +1063,7 @@ static void si_calculate_max_simd_waves(struct si_shader *shader) max_simd_waves = MIN2(max_simd_waves, max_vgprs / conf->num_vgprs); } - /* LDS is 64KB per CU (4 SIMDs) on GFX6-9, which is 16KB per SIMD (usage above - * 16KB makes some SIMDs unoccupied). - * - * LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used. - */ - unsigned max_lds_size = sscreen->info.chip_class >= GFX10 ? 128*1024 : 64*1024; - unsigned max_lds_per_simd = max_lds_size / 4; + unsigned max_lds_per_simd = sscreen->info.lds_size_per_cu / 4; if (lds_per_wave) max_simd_waves = MIN2(max_simd_waves, max_lds_per_simd / lds_per_wave); |