summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-07-03 22:56:58 -0400
committerMarek Olšák <[email protected]>2019-07-09 17:24:16 -0400
commit270a8ab6481f584be791ee1a827c12a536070d52 (patch)
tree1b08b936f4cabcdb1353d6c895b504057d8a3516
parentab1f36a1d3949542c73f8643f4d68a84fd453f83 (diff)
radeonsi/gfx10: launch 2 compute waves per CU before going onto the next CU
Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]> Acked-by: Dave Airlie <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 6c3509b5226..0989181aba4 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -810,12 +810,19 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
struct si_screen *sscreen = sctx->screen;
struct radeon_cmdbuf *cs = sctx->gfx_cs;
bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off;
+ unsigned threads_per_threadgroup =
+ info->block[0] * info->block[1] * info->block[2];
unsigned waves_per_threadgroup =
- DIV_ROUND_UP(info->block[0] * info->block[1] * info->block[2], 64);
+ DIV_ROUND_UP(threads_per_threadgroup, 64);
+ unsigned threadgroups_per_cu = 1;
+
+ if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1)
+ threadgroups_per_cu = 2;
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
si_get_compute_resource_limits(sscreen, waves_per_threadgroup,
- sctx->cs_max_waves_per_sh, 1));
+ sctx->cs_max_waves_per_sh,
+ threadgroups_per_cu));
unsigned dispatch_initiator =
S_00B800_COMPUTE_SHADER_EN(1) |