aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2017-10-22 17:38:43 +0200
committerNicolai Hähnle <[email protected]>2017-11-09 11:53:20 +0100
commitf76a6cb337f979fb26d3233d7e0ff208ec8885bb (patch)
tree21091663e2eaafc21711a7b77b91c3984f83d490
parentb650fc09c3a35ca624aad5fe4b5c34867708f116 (diff)
radeonsi: always use async compiles when creating shader/compute states
With Gallium threaded contexts, creating shader/compute states is effectively a screen operation, so we should not use context state. In particular, this allows us to avoid using the context's LLVM TargetMachine. This isn't an issue yet because u_threaded_context filters out non-async debug callbacks, and we disable threaded contexts for debug contexts. However, we may want to change that in the future. Reviewed-by: Marek Olšák <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c42
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c42
2 files changed, 50 insertions, 34 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index e55988af4cc..3eee907d44b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -23,6 +23,7 @@
*/
#include "tgsi/tgsi_parse.h"
+#include "util/u_async_debug.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
@@ -84,14 +85,10 @@ static void si_create_compute_state_async(void *job, int thread_index)
LLVMTargetMachineRef tm;
struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
- if (thread_index >= 0) {
- assert(thread_index < ARRAY_SIZE(program->screen->tm));
- tm = program->screen->tm[thread_index];
- if (!debug->async)
- debug = NULL;
- } else {
- tm = program->compiler_ctx_state.tm;
- }
+ assert(!debug->debug_message || debug->async);
+ assert(thread_index >= 0);
+ assert(thread_index < ARRAY_SIZE(program->screen->tm));
+ tm = program->screen->tm[thread_index];
memset(&sel, 0, sizeof(sel));
@@ -167,20 +164,31 @@ static void *si_create_compute_state(
return NULL;
}
- program->compiler_ctx_state.tm = sctx->tm;
program->compiler_ctx_state.debug = sctx->debug;
program->compiler_ctx_state.is_debug_context = sctx->is_debug;
p_atomic_inc(&sscreen->b.num_shaders_created);
util_queue_fence_init(&program->ready);
- if ((sctx->debug.debug_message && !sctx->debug.async) ||
- sctx->is_debug ||
- si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
- si_create_compute_state_async(program, -1);
- else
- util_queue_add_job(&sscreen->shader_compiler_queue,
- program, &program->ready,
- si_create_compute_state_async, NULL);
+ struct util_async_debug_callback async_debug;
+ bool wait =
+ (sctx->debug.debug_message && !sctx->debug.async) ||
+ sctx->is_debug ||
+ si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE);
+
+ if (wait) {
+ u_async_debug_init(&async_debug);
+ program->compiler_ctx_state.debug = async_debug.base;
+ }
+
+ util_queue_add_job(&sscreen->shader_compiler_queue,
+ program, &program->ready,
+ si_create_compute_state_async, NULL);
+
+ if (wait) {
+ util_queue_fence_wait(&program->ready);
+ u_async_debug_drain(&async_debug, &sctx->debug);
+ u_async_debug_cleanup(&async_debug);
+ }
} else {
const struct pipe_llvm_program_header *header;
const char *code;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 757e889c3b7..3edc340f01f 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -30,6 +30,7 @@
#include "tgsi/tgsi_ureg.h"
#include "util/hash_table.h"
#include "util/crc32.h"
+#include "util/u_async_debug.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
@@ -1840,14 +1841,10 @@ static void si_init_shader_selector_async(void *job, int thread_index)
struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
unsigned i;
- if (thread_index >= 0) {
- assert(thread_index < ARRAY_SIZE(sscreen->tm));
- tm = sscreen->tm[thread_index];
- if (!debug->async)
- debug = NULL;
- } else {
- tm = sel->compiler_ctx_state.tm;
- }
+ assert(!debug->debug_message || debug->async);
+ assert(thread_index >= 0);
+ assert(thread_index < ARRAY_SIZE(sscreen->tm));
+ tm = sscreen->tm[thread_index];
/* Compile the main shader part for use with a prolog and/or epilog.
* If this fails, the driver will try to compile a monolithic shader
@@ -2042,7 +2039,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
pipe_reference_init(&sel->reference, 1);
sel->screen = sscreen;
- sel->compiler_ctx_state.tm = sctx->tm;
sel->compiler_ctx_state.debug = sctx->debug;
sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
@@ -2272,14 +2268,26 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
(void) mtx_init(&sel->mutex, mtx_plain);
util_queue_fence_init(&sel->ready);
- if ((sctx->debug.debug_message && !sctx->debug.async) ||
- sctx->is_debug ||
- si_can_dump_shader(&sscreen->b, sel->info.processor))
- si_init_shader_selector_async(sel, -1);
- else
- util_queue_add_job(&sscreen->shader_compiler_queue, sel,
- &sel->ready, si_init_shader_selector_async,
- NULL);
+ struct util_async_debug_callback async_debug;
+ bool wait =
+ (sctx->debug.debug_message && !sctx->debug.async) ||
+ sctx->is_debug ||
+ si_can_dump_shader(&sscreen->b, sel->info.processor);
+
+ if (wait) {
+ u_async_debug_init(&async_debug);
+ sel->compiler_ctx_state.debug = async_debug.base;
+ }
+
+ util_queue_add_job(&sscreen->shader_compiler_queue, sel,
+ &sel->ready, si_init_shader_selector_async,
+ NULL);
+
+ if (wait) {
+ util_queue_fence_wait(&sel->ready);
+ u_async_debug_drain(&async_debug, &sctx->debug);
+ u_async_debug_cleanup(&async_debug);
+ }
return sel;
}