summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-08-28 17:38:50 -0400
committerDylan Baker <[email protected]>2019-09-10 09:51:57 -0700
commit481d82b65b20c7c769a5dff6d9a553c6869db978 (patch)
tree869922c4ea7a553e9c2eb26d47369ccccbfad731 /src/amd
parent732950bf360684602c26a0c6de013c9f7da804b8 (diff)
radeonsi/gfx10: fix wave occupancy computations
Cc: 19.2 <[email protected]> Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]> (cherry picked from commit d95afd8b9e7f9b3880813203292257bf0ed7babf)
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_gpu_info.h22
-rw-r--r--src/amd/vulkan/radv_device.c2
-rw-r--r--src/amd/vulkan/radv_shader.c7
3 files changed, 24 insertions, 7 deletions
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 8418a62e387..a3a187e2245 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -173,7 +173,7 @@ unsigned ac_get_compute_resource_limits(struct radeon_info *info,
unsigned max_waves_per_sh,
unsigned threadgroups_per_cu);
-static inline unsigned ac_get_max_simd_waves(enum radeon_family family)
+static inline unsigned ac_get_max_wave64_per_simd(enum radeon_family family)
{
switch (family) {
@@ -188,10 +188,26 @@ static inline unsigned ac_get_max_simd_waves(enum radeon_family family)
}
}
+static inline unsigned ac_get_num_physical_vgprs(enum chip_class chip_class,
+ unsigned wave_size)
+{
+ /* The number is per SIMD. */
+ if (chip_class >= GFX10)
+ return wave_size == 32 ? 1024 : 512;
+ else
+ return 256;
+}
+
static inline uint32_t
-ac_get_num_physical_sgprs(enum chip_class chip_class)
+ac_get_num_physical_sgprs(const struct radeon_info *info)
{
- return chip_class >= GFX8 ? 800 : 512;
+ /* The number is per SIMD. There is enough SGPRs for the maximum number
+ * of Wave32, which is double the number for Wave64.
+ */
+ if (info->chip_class >= GFX10)
+ return 128 * ac_get_max_wave64_per_simd(info->family) * 2;
+
+ return info->chip_class >= GFX8 ? 800 : 512;
}
#ifdef __cplusplus
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index c02b5c5afb9..3a10117f68c 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1307,7 +1307,7 @@ void radv_GetPhysicalDeviceProperties2(
/* SGPR. */
properties->sgprsPerSimd =
- ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
+ ac_get_num_physical_sgprs(&pdevice->rad_info);
properties->minSgprAllocation =
pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
properties->maxSgprAllocation =
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 41bc73a882f..4a845624c4d 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1343,7 +1343,7 @@ radv_get_max_waves(struct radv_device *device,
unsigned max_simd_waves;
unsigned lds_per_wave = 0;
- max_simd_waves = ac_get_max_simd_waves(device->physical_device->rad_info.family);
+ max_simd_waves = ac_get_max_wave64_per_simd(device->physical_device->rad_info.family);
if (stage == MESA_SHADER_FRAGMENT) {
lds_per_wave = conf->lds_size * lds_increment +
@@ -1359,7 +1359,8 @@ radv_get_max_waves(struct radv_device *device,
if (conf->num_sgprs)
max_simd_waves =
MIN2(max_simd_waves,
- ac_get_num_physical_sgprs(chip_class) / conf->num_sgprs);
+ ac_get_num_physical_sgprs(&device->physical_device->rad_info) /
+ conf->num_sgprs);
if (conf->num_vgprs)
max_simd_waves =
@@ -1456,7 +1457,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
VkShaderStatisticsInfoAMD statistics = {};
statistics.shaderStageMask = shaderStage;
statistics.numPhysicalVgprs = RADV_NUM_PHYSICAL_VGPRS;
- statistics.numPhysicalSgprs = ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class);
+ statistics.numPhysicalSgprs = ac_get_num_physical_sgprs(&device->physical_device->rad_info);
statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
if (stage == MESA_SHADER_COMPUTE) {