summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-04-13 18:09:11 -0400
committerMarek Olšák <[email protected]>2018-04-27 17:56:04 -0400
commit8b7358fe4376aecee0c29ea622f88f9ef07e6b11 (patch)
tree66acf1465d0cf3948a4ff513c45a948317e8e6f0 /src/gallium
parent3f0eaaf6d96bf627b6594ea2045ba1c0268dbac2 (diff)
radeonsi: increase the number of compiler threads depending on the CPU
The compiler queue was limited to 3 threads, so shader-db running on a 16-thread CPU would have a bottleneck on the 3-thread queue. Reviewed-by: Timothy Arceri <[email protected]> Tested-by: Benedikt Schemmer <ben at besd.de> Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c39
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h6
2 files changed, 29 insertions, 16 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index c7f1e27f6ba..1ca38ed55cb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -832,7 +832,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
const struct pipe_screen_config *config)
{
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
- unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i;
+ unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i;
if (!sscreen) {
return NULL;
@@ -889,17 +889,30 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
si_disk_cache_create(sscreen);
- /* Only enable as many threads as we have target machines, but at most
- * the number of CPUs - 1 if there is more than one.
- */
- num_threads = sysconf(_SC_NPROCESSORS_ONLN);
- num_threads = MAX2(1, num_threads - 1);
- num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->compiler));
- num_compiler_threads_lowprio =
- MIN2(num_threads, ARRAY_SIZE(sscreen->compiler_lowp));
+ /* Determine the number of shader compiler threads. */
+ hw_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+ if (hw_threads >= 12) {
+ num_comp_hi_threads = hw_threads * 3 / 4;
+ num_comp_lo_threads = hw_threads / 3;
+ } else if (hw_threads >= 6) {
+ num_comp_hi_threads = hw_threads - 2;
+ num_comp_lo_threads = hw_threads / 2;
+ } else if (hw_threads >= 2) {
+ num_comp_hi_threads = hw_threads - 1;
+ num_comp_lo_threads = hw_threads / 2;
+ } else {
+ num_comp_hi_threads = 1;
+ num_comp_lo_threads = 1;
+ }
+
+ num_comp_hi_threads = MIN2(num_comp_hi_threads,
+ ARRAY_SIZE(sscreen->compiler));
+ num_comp_lo_threads = MIN2(num_comp_lo_threads,
+ ARRAY_SIZE(sscreen->compiler_lowp));
if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
- 32, num_compiler_threads,
+ 64, num_comp_hi_threads,
UTIL_QUEUE_INIT_RESIZE_IF_FULL)) {
si_destroy_shader_cache(sscreen);
FREE(sscreen);
@@ -908,7 +921,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
"si_shader_low",
- 32, num_compiler_threads_lowprio,
+ 64, num_comp_lo_threads,
UTIL_QUEUE_INIT_RESIZE_IF_FULL |
UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
si_destroy_shader_cache(sscreen);
@@ -1059,9 +1072,9 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
sscreen->debug_flags |= DBG_ALL_SHADERS;
- for (i = 0; i < num_compiler_threads; i++)
+ for (i = 0; i < num_comp_hi_threads; i++)
si_init_compiler(sscreen, &sscreen->compiler[i]);
- for (i = 0; i < num_compiler_threads_lowprio; i++)
+ for (i = 0; i < num_comp_lo_threads; i++)
si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
/* Create the auxiliary context. This must be done last. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index a67786c84d9..27efc5099f0 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -529,13 +529,13 @@ struct si_screen {
struct util_queue shader_compiler_queue;
/* Use at most 3 normal compiler threads on quadcore and better.
* Hyperthreaded CPUs report the number of threads, but we want
- * the number of cores. */
- struct si_compiler compiler[3]; /* used by the queue only */
+ * the number of cores. We only need this many threads for shader-db. */
+ struct si_compiler compiler[24]; /* used by the queue only */
struct util_queue shader_compiler_queue_low_priority;
/* Use at most 2 low priority threads on quadcore and better.
* We want to minimize the impact on multithreaded Mesa. */
- struct si_compiler compiler_lowp[2]; /* at most 2 threads */
+ struct si_compiler compiler_lowp[10];
};
struct si_blend_color {