summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-01-22 17:18:01 -0500
committerMarek Olšák <[email protected]>2019-05-16 13:06:54 -0400
commit9624855f13c20f28c198c4c5c53e78aff3492a6d (patch)
tree51eb684cb5bc575a784ea753a61b362ebae85ba9
parent6e38af063169258b99b7609953138ce81f4d5f98 (diff)
radeonsi: add threadgroups_per_cu param into si_get_compute_resource_limits
Tested-by: Dieter Nützel <[email protected]> Acked-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c9
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h3
2 files changed, 8 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 2d70f0759bb..f92ab591036 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -751,7 +751,8 @@ static void si_setup_tgsi_user_data(struct si_context *sctx,
unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
unsigned waves_per_threadgroup,
- unsigned max_waves_per_sh)
+ unsigned max_waves_per_sh,
+ unsigned threadgroups_per_cu)
{
unsigned compute_resource_limits =
S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
@@ -767,7 +768,9 @@ unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
- compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh);
+ assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
+ compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) |
+ S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
} else {
/* GFX6 */
if (max_waves_per_sh) {
@@ -789,7 +792,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
si_get_compute_resource_limits(sscreen, waves_per_threadgroup,
- sctx->cs_max_waves_per_sh));
+ sctx->cs_max_waves_per_sh, 1));
unsigned dispatch_initiator =
S_00B800_COMPUTE_SHADER_EN(1) |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index f65f4f50043..4606140f53d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1307,7 +1307,8 @@ unsigned si_end_counter(struct si_screen *sscreen, unsigned type,
void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs);
unsigned si_get_compute_resource_limits(struct si_screen *sscreen,
unsigned waves_per_threadgroup,
- unsigned max_waves_per_sh);
+ unsigned max_waves_per_sh,
+ unsigned threadgroups_per_cu);
void si_init_compute_functions(struct si_context *sctx);
/* si_perfcounters.c */