diff options
author | Marek Olšák <[email protected]> | 2018-04-13 18:09:11 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2018-04-27 17:56:04 -0400 |
commit | 8b7358fe4376aecee0c29ea622f88f9ef07e6b11 (patch) | |
tree | 66acf1465d0cf3948a4ff513c45a948317e8e6f0 | |
parent | 3f0eaaf6d96bf627b6594ea2045ba1c0268dbac2 (diff) |
radeonsi: increase the number of compiler threads depending on the CPU
The compiler queue was limited to 3 threads, so shader-db running
on a 16-thread CPU would have a bottleneck on the 3-thread queue.
Reviewed-by: Timothy Arceri <[email protected]>
Tested-by: Benedikt Schemmer <ben at besd.de>
Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 39 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 6 |
2 files changed, 29 insertions, 16 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index c7f1e27f6ba..1ca38ed55cb 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -832,7 +832,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, const struct pipe_screen_config *config) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); - unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i; + unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i; if (!sscreen) { return NULL; @@ -889,17 +889,30 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, si_disk_cache_create(sscreen); - /* Only enable as many threads as we have target machines, but at most - * the number of CPUs - 1 if there is more than one. - */ - num_threads = sysconf(_SC_NPROCESSORS_ONLN); - num_threads = MAX2(1, num_threads - 1); - num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->compiler)); - num_compiler_threads_lowprio = - MIN2(num_threads, ARRAY_SIZE(sscreen->compiler_lowp)); + /* Determine the number of shader compiler threads. */ + hw_threads = sysconf(_SC_NPROCESSORS_ONLN); + + if (hw_threads >= 12) { + num_comp_hi_threads = hw_threads * 3 / 4; + num_comp_lo_threads = hw_threads / 3; + } else if (hw_threads >= 6) { + num_comp_hi_threads = hw_threads - 2; + num_comp_lo_threads = hw_threads / 2; + } else if (hw_threads >= 2) { + num_comp_hi_threads = hw_threads - 1; + num_comp_lo_threads = hw_threads / 2; + } else { + num_comp_hi_threads = 1; + num_comp_lo_threads = 1; + } + + num_comp_hi_threads = MIN2(num_comp_hi_threads, + ARRAY_SIZE(sscreen->compiler)); + num_comp_lo_threads = MIN2(num_comp_lo_threads, + ARRAY_SIZE(sscreen->compiler_lowp)); if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader", - 32, num_compiler_threads, + 64, num_comp_hi_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL)) { si_destroy_shader_cache(sscreen); FREE(sscreen); @@ -908,7 +921,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, "si_shader_low", - 32, num_compiler_threads_lowprio, + 64, num_comp_lo_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { si_destroy_shader_cache(sscreen); @@ -1059,9 +1072,9 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->debug_flags |= DBG_ALL_SHADERS; - for (i = 0; i < num_compiler_threads; i++) + for (i = 0; i < num_comp_hi_threads; i++) si_init_compiler(sscreen, &sscreen->compiler[i]); - for (i = 0; i < num_compiler_threads_lowprio; i++) + for (i = 0; i < num_comp_lo_threads; i++) si_init_compiler(sscreen, &sscreen->compiler_lowp[i]); /* Create the auxiliary context. This must be done last. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a67786c84d9..27efc5099f0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -529,13 +529,13 @@ struct si_screen { struct util_queue shader_compiler_queue; /* Use at most 3 normal compiler threads on quadcore and better. * Hyperthreaded CPUs report the number of threads, but we want - * the number of cores. */ - struct si_compiler compiler[3]; /* used by the queue only */ + * the number of cores. We only need this many threads for shader-db. */ + struct si_compiler compiler[24]; /* used by the queue only */ struct util_queue shader_compiler_queue_low_priority; /* Use at most 2 low priority threads on quadcore and better. * We want to minimize the impact on multithreaded Mesa. */ - struct si_compiler compiler_lowp[2]; /* at most 2 threads */ + struct si_compiler compiler_lowp[10]; }; struct si_blend_color { |