aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd/common
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2019-07-12 12:17:11 +0200
committerSamuel Pitoiset <[email protected]>2019-07-12 17:47:11 +0200
commite510c5ee3b8c7f0d1d9afff28760469f43c24c02 (patch)
tree9935211693651396e3f4e6f589db5690533c8d32 /src/amd/common
parent5f4f8aec748427462bb4539dcfbe0e622842327a (diff)
ac: import ac_get_compute_resource_limits() from RadeonSI
Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd/common')
-rw-r--r--src/amd/common/ac_gpu_info.c32
-rw-r--r--src/amd/common/ac_gpu_info.h4
2 files changed, 36 insertions, 0 deletions
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 596a9ebe508..a501d840b25 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -895,3 +895,35 @@ ac_get_harvested_configs(struct radeon_info *info,
}
}
}
+
+unsigned ac_get_compute_resource_limits(struct radeon_info *info,
+ unsigned waves_per_threadgroup,
+ unsigned max_waves_per_sh,
+ unsigned threadgroups_per_cu)
+{
+ unsigned compute_resource_limits =
+ S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
+
+ if (info->chip_class >= GFX7) {
+ unsigned num_cu_per_se = info->num_good_compute_units /
+ info->max_se;
+
+ /* Force even distribution on all SIMDs in CU if the workgroup
+ * size is 64. This has shown some good improvements if # of CUs
+ * per SE is not a multiple of 4.
+ */
+ if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
+ compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
+
+ assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
+ compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) |
+ S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
+ } else {
+ /* GFX6 */
+ if (max_waves_per_sh) {
+ unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16);
+ compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16);
+ }
+ }
+ return compute_resource_limits;
+}
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index c42548f8352..3ec3e44d665 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -167,6 +167,10 @@ void ac_get_harvested_configs(struct radeon_info *info,
unsigned raster_config,
unsigned *cik_raster_config_1_p,
unsigned *raster_config_se);
+unsigned ac_get_compute_resource_limits(struct radeon_info *info,
+ unsigned waves_per_threadgroup,
+ unsigned max_waves_per_sh,
+ unsigned threadgroups_per_cu);
static inline unsigned ac_get_max_simd_waves(enum radeon_family family)
{