aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2019-07-12 12:17:13 +0200
committerSamuel Pitoiset <[email protected]>2019-07-12 17:47:11 +0200
commit3f76c0f47c9196556449c0e6e0bc5339028aff2a (patch)
tree74889695c1c8a7ef7bde735b0732636cda01a7a1 /src/amd
parente631d65fc6155020e9f90eff676bf7f61b234d3a (diff)
radv/gfx10: launch 2 compute waves per CU before going onto the next CU
Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/vulkan/radv_pipeline.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index efb94cdcd23..a58b0d6d006 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -4378,6 +4378,7 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
{
struct radv_shader_variant *compute_shader;
struct radv_device *device = pipeline->device;
+ unsigned threads_per_threadgroup;
unsigned threadgroups_per_cu = 1;
unsigned waves_per_threadgroup;
unsigned max_waves_per_sh = 0;
@@ -4402,10 +4403,14 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
/* Calculate best compute resource limits. */
- waves_per_threadgroup =
- DIV_ROUND_UP(compute_shader->info.cs.block_size[0] *
- compute_shader->info.cs.block_size[1] *
- compute_shader->info.cs.block_size[2], 64);
+ threads_per_threadgroup = compute_shader->info.cs.block_size[0] *
+ compute_shader->info.cs.block_size[1] *
+ compute_shader->info.cs.block_size[2];
+ waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, 64);
+
+ if (device->physical_device->rad_info.chip_class >= GFX10 &&
+ waves_per_threadgroup == 1)
+ threadgroups_per_cu = 2;
radeon_set_sh_reg(&pipeline->cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
ac_get_compute_resource_limits(&device->physical_device->rad_info,