aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-06-05 01:20:23 -0400
committerMarek Olšák <[email protected]>2018-06-19 12:52:28 -0400
commite93fe403bc0e85994d1be59ad3ad5bf65ecbf971 (patch)
tree43a67d994bf7c7b3c03e9d1a247e4e21ddd98abb /src/gallium
parentda0115b1c37e263773fb56dc28fbd22eb0f66811 (diff)
radeonsi: properly compute an LS-HS thread group size limit
"64 / max * 4" is less than "64 * 4 / max". Tested-by: Dieter Nützel <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c5
1 files changed, 3 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 942cb3c7994..e7f8389caf3 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -146,7 +146,8 @@ static bool si_emit_derived_tess_state(struct si_context *sctx,
* resource usage. Also ensures that the number of tcs in and out
* vertices per threadgroup are at most 256.
*/
- *num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
+ unsigned max_verts_per_patch = MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ *num_patches = 256 / max_verts_per_patch;
/* Make sure that the data fits in LDS. This assumes the shaders only
* use LDS for the inputs and outputs.
@@ -173,7 +174,7 @@ static bool si_emit_derived_tess_state(struct si_context *sctx,
/* SI bug workaround, related to power management. Limit LS-HS
* threadgroups to only one wave.
*/
- unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ unsigned one_wave = 64 / max_verts_per_patch;
*num_patches = MIN2(*num_patches, one_wave);
}