summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-05-31 13:18:53 +0200
committerMarek Olšák <[email protected]>2017-06-07 18:43:42 +0200
commit86cc8097266c2bd9d8a6ccc3d7f61391f13119be (patch)
tree48944a4f56f14aef32e56a73915595b16aab0e49 /src
parent89b6c93ae3135a44b1aa2ce9285502a3898920bc (diff)
radeonsi: use a compiler queue with a low priority for optimized shaders
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c31
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c8
3 files changed, 34 insertions, 8 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 47426b41da6..805392d7132 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -742,11 +742,16 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
return;
util_queue_destroy(&sscreen->shader_compiler_queue);
+ util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
if (sscreen->tm[i])
LLVMDisposeTargetMachine(sscreen->tm[i]);
+ for (i = 0; i < ARRAY_SIZE(sscreen->tm_low_priority); i++)
+ if (sscreen->tm_low_priority[i])
+ LLVMDisposeTargetMachine(sscreen->tm_low_priority[i]);
+
/* Free shader parts. */
for (i = 0; i < ARRAY_SIZE(parts); i++) {
while (parts[i]) {
@@ -860,7 +865,7 @@ static void si_test_vmfault(struct si_screen *sscreen)
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
{
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
- unsigned num_cpus, num_compiler_threads, i;
+ unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i;
if (!sscreen) {
return NULL;
@@ -885,9 +890,11 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
/* Only enable as many threads as we have target machines, but at most
* the number of CPUs - 1 if there is more than one.
*/
- num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
- num_cpus = MAX2(1, num_cpus - 1);
- num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm));
+ num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+ num_threads = MAX2(1, num_threads - 1);
+ num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm));
+ num_compiler_threads_lowprio =
+ MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority));
if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
32, num_compiler_threads, 0)) {
@@ -896,6 +903,20 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
return NULL;
}
+ /* The queue must be large enough so that adding optimized shaders
+ * doesn't stall draw calls when the queue is full. Especially varying
+ * packing generates a very high volume of optimized shader compilation
+ * jobs.
+ */
+ if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
+ "si_shader_low",
+ 1024, num_compiler_threads,
+ UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
+ si_destroy_shader_cache(sscreen);
+ FREE(sscreen);
+ return NULL;
+ }
+
si_handle_env_var_force_family(sscreen);
if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
@@ -959,6 +980,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
for (i = 0; i < num_compiler_threads; i++)
sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
+ for (i = 0; i < num_compiler_threads_lowprio; i++)
+ sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen);
/* Create the auxiliary context. This must be done last. */
sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 13ec0729b19..e917cb1b781 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -110,6 +110,9 @@ struct si_screen {
/* Shader compiler queue for multithreaded compilation. */
struct util_queue shader_compiler_queue;
LLVMTargetMachineRef tm[4]; /* used by the queue only */
+
+ struct util_queue shader_compiler_queue_low_priority;
+ LLVMTargetMachineRef tm_low_priority[4];
};
struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 62bb2212110..5a22add0abc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1450,8 +1450,8 @@ static void si_build_shader_variant(void *job, int thread_index)
int r;
if (thread_index >= 0) {
- assert(thread_index < ARRAY_SIZE(sscreen->tm));
- tm = sscreen->tm[thread_index];
+ assert(thread_index < ARRAY_SIZE(sscreen->tm_low_priority));
+ tm = sscreen->tm_low_priority[thread_index];
if (!debug->async)
debug = NULL;
} else {
@@ -1679,7 +1679,7 @@ again:
!is_pure_monolithic &&
thread_index < 0) {
/* Compile it asynchronously. */
- util_queue_add_job(&sscreen->shader_compiler_queue,
+ util_queue_add_job(&sscreen->shader_compiler_queue_low_priority,
shader, &shader->optimized_ready,
si_build_shader_variant, NULL);
@@ -2258,7 +2258,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
{
if (shader->is_optimized) {
- util_queue_drop_job(&sctx->screen->shader_compiler_queue,
+ util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
&shader->optimized_ready);
util_queue_fence_destroy(&shader->optimized_ready);
}