diff options
author | Marek Olšák <[email protected]> | 2016-06-11 19:57:40 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-07-05 00:47:13 +0200 |
commit | 5c92c21369ee3b4f52eb5aed183092ba3ee7e079 (patch) | |
tree | d1464436b7410d1169ffd0a8e003db54f8c9e422 /src/gallium/drivers/radeonsi/si_pipe.c | |
parent | 84824935cf28b72bac9f73787aadf20b95dea230 (diff) |
radeonsi: do compilation from si_create_shader_selector asynchronously
Main shader parts and geometry shaders are compiled asynchronously
by util_queue. si_create_shader_selector doesn't wait and returns.
si_draw_vbo(si_shader_select) waits for completion.
This has the best effect when shaders are compiled at app-loading time.
It doesn't help much for shaders compiled on demand, even though
VS+PS compilation should take as much as time as the bigger one of the two.
If an app creates more shaders, at most 4 threads will be used to compile
them.
Debug output disables this for shader stats to be printed in the correct
order.
(We could go even further and build variants asynchronously too, then emit
draw calls without waiting and emit incomplete shader states, then force IB
chaining to give the compiler more time, then sync the compilation at the IB
flush and patch the IB with correct shader states. This is great for
compilation before draw calls, but there are some difficulties such as
scratch and tess states requiring the compiler output, and an on-disk shader
cache will likely be a much better and simpler solution.)
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_pipe.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 06b32db43db..ee97bcfaea5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -663,6 +663,13 @@ static void si_destroy_screen(struct pipe_screen* pscreen) if (!sscreen->b.ws->unref(sscreen->b.ws)) return; + if (util_queue_is_initialized(&sscreen->shader_compiler_queue)) + util_queue_destroy(&sscreen->shader_compiler_queue); + + for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++) + if (sscreen->tm[i]) + LLVMDisposeTargetMachine(sscreen->tm[i]); + /* Free shader parts. */ for (i = 0; i < ARRAY_SIZE(parts); i++) { while (parts[i]) { @@ -710,6 +717,7 @@ static bool si_init_gs_info(struct si_screen *sscreen) struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); + unsigned num_cpus, num_compiler_threads, i; if (!sscreen) { return NULL; @@ -754,6 +762,16 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; + /* Only enable as many threads as we have target machines and CPUs. */ + num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm)); + + for (i = 0; i < num_compiler_threads; i++) + sscreen->tm[i] = si_create_llvm_target_machine(sscreen); + + util_queue_init(&sscreen->shader_compiler_queue, "si_shader", + 32, num_compiler_threads); + /* Create the auxiliary context. This must be done last. */ sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0); |