diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 39 |
4 files changed, 58 insertions, 7 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 06b32db43db..ee97bcfaea5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -663,6 +663,13 @@ static void si_destroy_screen(struct pipe_screen* pscreen) if (!sscreen->b.ws->unref(sscreen->b.ws)) return; + if (util_queue_is_initialized(&sscreen->shader_compiler_queue)) + util_queue_destroy(&sscreen->shader_compiler_queue); + + for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++) + if (sscreen->tm[i]) + LLVMDisposeTargetMachine(sscreen->tm[i]); + /* Free shader parts. */ for (i = 0; i < ARRAY_SIZE(parts); i++) { while (parts[i]) { @@ -710,6 +717,7 @@ static bool si_init_gs_info(struct si_screen *sscreen) struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); + unsigned num_cpus, num_compiler_threads, i; if (!sscreen) { return NULL; @@ -754,6 +762,16 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; + /* Only enable as many threads as we have target machines and CPUs. */ + num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm)); + + for (i = 0; i < num_compiler_threads; i++) + sscreen->tm[i] = si_create_llvm_target_machine(sscreen); + + util_queue_init(&sscreen->shader_compiler_queue, "si_shader", + 32, num_compiler_threads); + /* Create the auxiliary context. This must be done last. */ sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index fc7e73e01a7..1f63c12e5b4 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -27,6 +27,7 @@ #define SI_PIPE_H #include "si_state.h" +#include "util/u_queue.h" #include <llvm-c/TargetMachine.h> @@ -110,6 +111,10 @@ struct si_screen { */ pipe_mutex shader_cache_mutex; struct hash_table *shader_cache; + + /* Shader compiler queue for multithreaded compilation. */ + struct util_queue shader_compiler_queue; + LLVMTargetMachineRef tm[4]; /* used by the queue only */ }; struct si_blend_color { @@ -207,7 +212,7 @@ struct si_context { struct pipe_fence_handle *last_gfx_fence; struct si_shader_ctx_state fixed_func_tcs_shader; - LLVMTargetMachineRef tm; + LLVMTargetMachineRef tm; /* only non-threaded compilation */ bool gfx_flush_in_progress; /* Atoms (direct states). */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index be75a354279..8fc0a36abbc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -234,6 +234,7 @@ struct si_shader; */ struct si_shader_selector { struct si_screen *screen; + struct util_queue_fence ready; /* Should only be used by si_init_shader_selector_async * if thread_index == -1 (non-threaded). */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 799aa5708c1..117cf4be1bc 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -991,7 +991,8 @@ static int si_shader_select_with_key(struct si_screen *sscreen, struct si_shader_ctx_state *state, union si_shader_key *key, LLVMTargetMachineRef tm, - struct pipe_debug_callback *debug) + struct pipe_debug_callback *debug, + bool wait) { struct si_shader_selector *sel = state->cso; struct si_shader *current = state->current; @@ -1005,6 +1006,13 @@ static int si_shader_select_with_key(struct si_screen *sscreen, if (likely(current && memcmp(¤t->key, key, sizeof(*key)) == 0)) return 0; + /* This must be done before the mutex is locked, because async GS + * compilation calls this function too, and therefore must enter + * the mutex first. + */ + if (wait) + util_queue_job_wait(&sel->ready); + pipe_mutex_lock(sel->mutex); /* Find the shader variant. */ @@ -1057,7 +1065,7 @@ static int si_shader_select(struct pipe_context *ctx, si_shader_selector_key(ctx, state->cso, &key); return si_shader_select_with_key(sctx->screen, state, &key, - sctx->tm, &sctx->b.debug); + sctx->tm, &sctx->b.debug, true); } static void si_parse_next_shader_property(const struct tgsi_shader_info *info, @@ -1094,10 +1102,19 @@ void si_init_shader_selector_async(void *job, int thread_index) { struct si_shader_selector *sel = (struct si_shader_selector *)job; struct si_screen *sscreen = sel->screen; - LLVMTargetMachineRef tm = sel->tm; - struct pipe_debug_callback *debug = &sel->debug; + LLVMTargetMachineRef tm; + struct pipe_debug_callback *debug; unsigned i; + if (thread_index >= 0) { + assert(thread_index < ARRAY_SIZE(sscreen->tm)); + tm = sscreen->tm[thread_index]; + debug = NULL; + } else { + tm = sel->tm; + debug = &sel->debug; + } + /* Compile the main shader part for use with a prolog and/or epilog. * If this fails, the driver will try to compile a monolithic shader * on demand. @@ -1172,7 +1189,8 @@ void si_init_shader_selector_async(void *job, int thread_index) break; } - if (si_shader_select_with_key(sscreen, &state, &key, tm, debug)) + if (si_shader_select_with_key(sscreen, &state, &key, tm, debug, + false)) fprintf(stderr, "radeonsi: can't create a monolithic shader\n"); } } @@ -1304,8 +1322,14 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) | S_02880C_EXEC_ON_NOOP(1); pipe_mutex_init(sel->mutex); + util_queue_fence_init(&sel->ready); - si_init_shader_selector_async(sel, -1); + if (sctx->b.debug.debug_message || + !util_queue_is_initialized(&sscreen->shader_compiler_queue)) + si_init_shader_selector_async(sel, -1); + else + util_queue_add_job(&sscreen->shader_compiler_queue, sel, + &sel->ready, si_init_shader_selector_async); return sel; } @@ -1442,6 +1466,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) [PIPE_SHADER_FRAGMENT] = &sctx->ps_shader, }; + util_queue_job_wait(&sel->ready); + if (current_shader[sel->type]->cso == sel) { current_shader[sel->type]->cso = NULL; current_shader[sel->type]->current = NULL; @@ -1456,6 +1482,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) if (sel->main_shader_part) si_delete_shader(sctx, sel->main_shader_part); + util_queue_fence_destroy(&sel->ready); pipe_mutex_destroy(sel->mutex); free(sel->tokens); free(sel); |