diff options
author | Tom Stellard <[email protected]> | 2012-09-13 14:59:50 +0000 |
---|---|---|
committer | Tom Stellard <[email protected]> | 2012-09-25 14:36:46 +0000 |
commit | b57eba365496307c35373f5c303996731b994f25 (patch) | |
tree | 949e648c7e6b5eeeb8815689de1ca9910a513189 /src/gallium/drivers/r600 | |
parent | e59505e34bdea772bb439cb42f2ef20ec495de03 (diff) |
r600g: Handle multiple kernels in the same program v2
v2:
- Use pc parameter of launch_grid
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 44 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute_internal.h | 18 | ||||
-rw-r--r-- | src/gallium/drivers/r600/llvm_wrapper.cpp | 38 | ||||
-rw-r--r-- | src/gallium/drivers/r600/llvm_wrapper.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 1 |
5 files changed, 84 insertions, 21 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index c7b9d3314ac..b7c734512e2 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -123,11 +123,11 @@ void *evergreen_create_compute_state( { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); - void *p; #ifdef HAVE_OPENCL const struct pipe_llvm_program_header * header; const unsigned char * code; + unsigned i; COMPUTE_DBG("*** evergreen_create_compute_state\n"); @@ -144,18 +144,15 @@ void *evergreen_create_compute_state( shader->input_size = cso->req_input_mem; #ifdef HAVE_OPENCL - shader->mod = llvm_parse_bitcode(code, header->num_bytes); + shader->num_kernels = llvm_get_num_kernels(code, header->num_bytes); + shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels); - r600_compute_shader_create(ctx_, shader->mod, &shader->bc); + for (i = 0; i < shader->num_kernels; i++) { + struct r600_kernel *kernel = &shader->kernels[i]; + kernel->llvm_module = llvm_get_kernel_module(i, code, + header->num_bytes); + } #endif - shader->shader_code_bo = r600_compute_buffer_alloc_vram(ctx->screen, - shader->bc.ndw * 4); - - p = ctx->ws->buffer_map(shader->shader_code_bo->cs_buf, ctx->cs, - PIPE_TRANSFER_WRITE); - - memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); - ctx->ws->buffer_unmap(shader->shader_code_bo->cs_buf); return shader; } @@ -456,20 +453,21 @@ void evergreen_emit_cs_shader( struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; + struct r600_kernel *kernel = &shader->kernels[state->kernel_index]; struct radeon_winsys_cs *cs = rctx->cs; uint64_t va; - va = r600_resource_va(&rctx->screen->screen, &shader->shader_code_bo->b.b); + va = r600_resource_va(&rctx->screen->screen, &kernel->code_bo->b.b); r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3); r600_write_value(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */ r600_write_value(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */ - S_0288D4_NUM_GPRS(shader->bc.ngpr) - | S_0288D4_STACK_SIZE(shader->bc.nstack)); + S_0288D4_NUM_GPRS(kernel->bc.ngpr) + | S_0288D4_STACK_SIZE(kernel->bc.nstack)); r600_write_value(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ r600_write_value(cs, PKT3C(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, shader->shader_code_bo, + r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo, RADEON_USAGE_READ)); rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; @@ -481,9 +479,25 @@ static void evergreen_launch_grid( uint32_t pc, const void *input) { struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; + void *p; COMPUTE_DBG("*** evergreen_launch_grid: pc = %u\n", pc); +#ifdef HAVE_OPENCL + if (!shader->kernels[pc].code_bo) { + struct r600_kernel *kernel = &shader->kernels[pc]; + r600_compute_shader_create(ctx_, kernel->llvm_module, &kernel->bc); + kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, + kernel->bc.ndw * 4); + p = ctx->ws->buffer_map(kernel->code_bo->cs_buf, ctx->cs, + PIPE_TRANSFER_WRITE); + memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); + ctx->ws->buffer_unmap(kernel->code_bo->cs_buf); + } +#endif + + ctx->cs_shader_state.kernel_index = pc; evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); compute_emit_cs(ctx, block_layout, grid_layout); } diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h index 2bef261ebb5..328ce262a7a 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.h +++ b/src/gallium/drivers/r600/evergreen_compute_internal.h @@ -26,6 +26,7 @@ #define EVERGREEN_COMPUTE_INTERNAL_H #include "compute_memory_pool.h" +#include "r600_asm.h" enum evergreen_compute_resources { @@ -67,21 +68,26 @@ struct number_type_and_format { unsigned num_format_all; }; +struct r600_kernel { + unsigned count; +#ifdef HAVE_OPENCL + LLVMModuleRef llvm_module; +#endif + struct r600_resource *code_bo; + struct r600_bytecode bc; +}; + struct r600_pipe_compute { struct r600_context *ctx; - struct r600_bytecode bc; - struct tgsi_token *tokens; + unsigned num_kernels; + struct r600_kernel *kernels; struct evergreen_compute_resource *resources; unsigned local_size; unsigned private_size; unsigned input_size; -#ifdef HAVE_OPENCL - LLVMModuleRef mod; -#endif struct r600_resource *kernel_param; - struct r600_resource *shader_code_bo; }; int evergreen_compute_get_gpu_format(struct number_type_and_format* fmt, struct r600_resource *bo); ///get hw format from resource, return 0 on faliure, nonzero on success diff --git a/src/gallium/drivers/r600/llvm_wrapper.cpp b/src/gallium/drivers/r600/llvm_wrapper.cpp index 174fb013c83..81f53974d11 100644 --- a/src/gallium/drivers/r600/llvm_wrapper.cpp +++ b/src/gallium/drivers/r600/llvm_wrapper.cpp @@ -1,9 +1,11 @@ #include <llvm/ADT/OwningPtr.h> #include <llvm/ADT/StringRef.h> #include <llvm/LLVMContext.h> +#include <llvm/PassManager.h> #include <llvm/Support/IRReader.h> #include <llvm/Support/MemoryBuffer.h> #include <llvm/Support/SourceMgr.h> +#include <llvm/Transforms/IPO.h> #include "llvm_wrapper.h" @@ -17,3 +19,39 @@ extern "C" LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsig M.reset(llvm::ParseIR(buffer, Err, llvm::getGlobalContext())); return wrap(M.take()); } + +extern "C" void llvm_strip_unused_kernels(LLVMModuleRef mod, const char *kernel_name) +{ + llvm::Module *M = llvm::unwrap(mod); + std::vector<const char *> export_list; + export_list.push_back(kernel_name); + llvm::PassManager PM; + PM.add(llvm::createInternalizePass(export_list)); + PM.add(llvm::createGlobalDCEPass()); + PM.run(*M); +} + +extern "C" unsigned llvm_get_num_kernels(const unsigned char *bitcode, + unsigned bitcode_len) +{ + LLVMModuleRef mod = llvm_parse_bitcode(bitcode, bitcode_len); + llvm::Module *M = llvm::unwrap(mod); + const llvm::NamedMDNode *kernel_node + = M->getNamedMetadata("opencl.kernels"); + unsigned kernel_count = kernel_node->getNumOperands(); + delete M; + return kernel_count; +} + +extern "C" LLVMModuleRef llvm_get_kernel_module(unsigned index, + const unsigned char *bitcode, unsigned bitcode_len) +{ + LLVMModuleRef mod = llvm_parse_bitcode(bitcode, bitcode_len); + llvm::Module *M = llvm::unwrap(mod); + const llvm::NamedMDNode *kernel_node = + M->getNamedMetadata("opencl.kernels"); + const char* kernel_name = kernel_node->getOperand(index)-> + getOperand(0)->getName().data(); + llvm_strip_unused_kernels(mod, kernel_name); + return mod; +} diff --git a/src/gallium/drivers/r600/llvm_wrapper.h b/src/gallium/drivers/r600/llvm_wrapper.h index 3a696455cdf..4b9b93f23c6 100644 --- a/src/gallium/drivers/r600/llvm_wrapper.h +++ b/src/gallium/drivers/r600/llvm_wrapper.h @@ -8,6 +8,10 @@ extern "C" { #endif LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsigned bitcode_len); +void llvm_strip_unused_kernels(LLVMModuleRef mod, const char *kernel_name); +unsigned llvm_get_num_kernels(const unsigned char *bitcode, unsigned bitcode_len); +LLVMModuleRef llvm_get_kernel_module(unsigned index, + const unsigned char *bitcode, unsigned bitcode_len); #ifdef __cplusplus } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 979cb438938..68800609979 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -123,6 +123,7 @@ struct r600_clip_state { struct r600_cs_shader_state { struct r600_atom atom; + unsigned kernel_index; struct r600_pipe_compute *shader; }; |