diff options
author | Tom Stellard <[email protected]> | 2013-05-21 17:02:33 -0400 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2013-06-03 10:24:54 -0400 |
commit | d2472ceb92a4104b6ac98c8256a00697074d34af (patch) | |
tree | b1c131b84069885c4db61f85f7360bd90603d20c | |
parent | 3f241903254ca8ce2bb4c73f321b9dec05191314 (diff) |
radeonsi/compute: Support multiple kernels in a compute program
-rw-r--r-- | src/gallium/drivers/radeonsi/radeonsi_compute.c | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/src/gallium/drivers/radeonsi/radeonsi_compute.c b/src/gallium/drivers/radeonsi/radeonsi_compute.c index e67d127b223..1ae7d9b37af 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_compute.c +++ b/src/gallium/drivers/radeonsi/radeonsi_compute.c @@ -11,7 +11,8 @@ struct si_pipe_compute { unsigned local_size; unsigned private_size; unsigned input_size; - struct si_pipe_shader shader; + unsigned num_kernels; + struct si_pipe_shader *kernels; unsigned num_user_sgprs; struct si_pm4_state *pm4_buffers; @@ -27,7 +28,7 @@ static void *radeonsi_create_compute_state( CALLOC_STRUCT(si_pipe_compute); const struct pipe_llvm_program_header *header; const unsigned char *code; - LLVMModuleRef mod; + unsigned i; header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); @@ -37,8 +38,15 @@ static void *radeonsi_create_compute_state( program->private_size = cso->req_private_mem; program->input_size = cso->req_input_mem; - mod = radeon_llvm_parse_bitcode(code, header->num_bytes); - si_compile_llvm(rctx, &program->shader, mod); + program->num_kernels = radeon_llvm_get_num_kernels(code, + header->num_bytes); + program->kernels = CALLOC(sizeof(struct si_pipe_shader), + program->num_kernels); + for (i = 0; i < program->num_kernels; i++) { + LLVMModuleRef mod = radeon_llvm_get_kernel_module(i, code, + header->num_bytes); + si_compile_llvm(rctx, &program->kernels[i], mod); + } return program; } @@ -88,6 +96,7 @@ static void radeonsi_launch_grid( uint64_t shader_va; unsigned arg_user_sgpr_count; unsigned i; + struct si_pipe_shader *shader = &program->kernels[pc]; pm4->compute_pkt = true; si_cmd_context_control(pm4); @@ -133,8 +142,8 @@ static void radeonsi_launch_grid( * (number of compute units) * 4 * (waves per simd) - 1 */ si_pm4_set_reg(pm4, R_00B82C_COMPUTE_MAX_WAVE_ID, 0x190 /* Default value */); - shader_va = r600_resource_va(ctx->screen, (void *)program->shader.bo); - si_pm4_add_bo(pm4, program->shader.bo, RADEON_USAGE_READ); + shader_va = r600_resource_va(ctx->screen, (void *)shader->bo); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff); si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); @@ -143,13 +152,13 @@ static void radeonsi_launch_grid( * TIDIG_COMP_CNT. * XXX: The compiler should account for this. */ - S_00B848_VGPRS((MAX2(3, program->shader.num_vgprs) - 1) / 4) + S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4) /* We always use at least 4 + arg_user_sgpr_count. The 4 extra * sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN * XXX: The compiler should account for this. */ | S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count, - program->shader.num_sgprs)) - 1) / 8)) + shader->num_sgprs)) - 1) / 8)) ; si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, @@ -201,7 +210,7 @@ static void radeonsi_launch_grid( #endif rctx->ws->cs_flush(rctx->cs, RADEON_FLUSH_COMPUTE, 0); - rctx->ws->buffer_wait(program->shader.bo->buf, 0); + rctx->ws->buffer_wait(shader->bo->buf, 0); FREE(pm4); } |