From e2ea9e11910a10ae10d6491d913ab1d57f97a0d4 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 2 Feb 2018 10:09:47 +1100 Subject: radeonsi/nir: add nir support for compiling compute shaders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_compute.c | 44 ++++++++++++++++++++++--------- src/gallium/drivers/radeonsi/si_compute.h | 5 +++- src/gallium/drivers/radeonsi/si_get.c | 8 +++--- 3 files changed, 39 insertions(+), 18 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 91d6810a916..41927988cf0 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -98,8 +98,19 @@ static void si_create_compute_state_async(void *job, int thread_index) memset(&sel, 0, sizeof(sel)); sel.screen = program->screen; - tgsi_scan_shader(program->tokens, &sel.info); - sel.tokens = program->tokens; + + if (program->ir_type == PIPE_SHADER_IR_TGSI) { + tgsi_scan_shader(program->ir.tgsi, &sel.info); + sel.tokens = program->ir.tgsi; + } else { + assert(program->ir_type == PIPE_SHADER_IR_NIR); + sel.nir = program->ir.nir; + + si_nir_scan_shader(sel.nir, &sel.info); + si_lower_nir(&sel); + } + + sel.type = PIPE_SHADER_COMPUTE; sel.local_size = program->local_size; si_get_active_slot_masks(&sel.info, @@ -141,7 +152,9 @@ static void si_create_compute_state_async(void *job, int thread_index) sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; } - FREE(program->tokens); + if (program->ir_type == PIPE_SHADER_IR_TGSI) + FREE(program->ir.tgsi); + program->shader.selector = NULL; } @@ -161,11 +174,16 @@ static void *si_create_compute_state( program->input_size = cso->req_input_mem; program->use_code_object_v2 = cso->ir_type == PIPE_SHADER_IR_NATIVE; - if (cso->ir_type == PIPE_SHADER_IR_TGSI) { - program->tokens = tgsi_dup_tokens(cso->prog); - if (!program->tokens) { - FREE(program); - return NULL; + if (cso->ir_type != PIPE_SHADER_IR_NATIVE) { + if (cso->ir_type == PIPE_SHADER_IR_TGSI) { + program->ir.tgsi = tgsi_dup_tokens(cso->prog); + if (!program->ir.tgsi) { + FREE(program); + return NULL; + } + } else { + assert(cso->ir_type == PIPE_SHADER_IR_NIR); + program->ir.nir = (struct nir_shader *) cso->prog; } program->compiler_ctx_state.debug = sctx->debug; @@ -230,7 +248,7 @@ static void si_bind_compute_state(struct pipe_context *ctx, void *state) return; /* Wait because we need active slot usage masks. */ - if (program->ir_type == PIPE_SHADER_IR_TGSI) + if (program->ir_type != PIPE_SHADER_IR_NATIVE) util_queue_fence_wait(&program->ready); si_set_active_descriptors(sctx, @@ -379,7 +397,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, sctx->cs_shader_state.offset == offset) return true; - if (program->ir_type == PIPE_SHADER_IR_TGSI) { + if (program->ir_type != PIPE_SHADER_IR_NATIVE) { config = &shader->config; } else { unsigned lds_blocks; @@ -805,7 +823,7 @@ static void si_launch_grid( sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; - if (program->ir_type == PIPE_SHADER_IR_TGSI && + if (program->ir_type != PIPE_SHADER_IR_NATIVE && program->shader.compilation_failed) return; @@ -870,7 +888,7 @@ static void si_launch_grid( RADEON_PRIO_COMPUTE_GLOBAL); } - if (program->ir_type == PIPE_SHADER_IR_TGSI) + if (program->ir_type != PIPE_SHADER_IR_NATIVE) si_setup_tgsi_grid(sctx, info); si_emit_dispatch_packets(sctx, info); @@ -891,7 +909,7 @@ static void si_launch_grid( void si_destroy_compute(struct si_compute *program) { - if (program->ir_type == PIPE_SHADER_IR_TGSI) { + if (program->ir_type != PIPE_SHADER_IR_NATIVE) { util_queue_drop_job(&program->screen->shader_compiler_queue, &program->ready); util_queue_fence_destroy(&program->ready); diff --git a/src/gallium/drivers/radeonsi/si_compute.h b/src/gallium/drivers/radeonsi/si_compute.h index c19b701fc71..ec411588f65 100644 --- a/src/gallium/drivers/radeonsi/si_compute.h +++ b/src/gallium/drivers/radeonsi/si_compute.h @@ -33,7 +33,10 @@ struct si_compute { struct pipe_reference reference; struct si_screen *screen; - struct tgsi_token *tokens; + union { + struct tgsi_token *tgsi; + struct nir_shader *nir; + } ir; struct util_queue_fence ready; struct si_compiler_ctx_state compiler_ctx_state; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 80023625b72..9a205c07f8e 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -721,7 +721,7 @@ static boolean si_vid_is_format_supported(struct pipe_screen *screen, static unsigned get_max_threads_per_block(struct si_screen *screen, enum pipe_shader_ir ir_type) { - if (ir_type != PIPE_SHADER_IR_TGSI) + if (ir_type == PIPE_SHADER_IR_NATIVE) return 256; /* Only 16 waves per thread-group on gfx9. */ @@ -869,10 +869,10 @@ static int si_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: if (ret) { uint64_t *max_variable_threads_per_block = ret; - if (ir_type == PIPE_SHADER_IR_TGSI) - *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; - else + if (ir_type == PIPE_SHADER_IR_NATIVE) *max_variable_threads_per_block = 0; + else + *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; } return sizeof(uint64_t); } -- cgit v1.2.3