diff options
Diffstat (limited to 'src/gallium/drivers/r600/evergreen_compute.c')
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 167 |
1 files changed, 101 insertions, 66 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 38b78c7dfcb..7a17d1ee089 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -49,6 +49,7 @@ #ifdef HAVE_OPENCL #include "radeon_llvm_util.h" #endif +#include "radeon_elf_util.h" #include <inttypes.h> /** @@ -198,18 +199,42 @@ void *evergreen_create_compute_state( { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); - -#ifdef HAVE_OPENCL const struct pipe_llvm_program_header * header; - const unsigned char * code; - unsigned i; - - shader->llvm_ctx = LLVMContextCreate(); + const char *code; + void *p; + boolean use_kill; COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n"); - header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); +#if HAVE_LLVM < 0x0306 +#ifdef HAVE_OPENCL + (void)use_kill; + (void)p; + shader->llvm_ctx = LLVMContextCreate(); + shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, + code, header->num_bytes); + shader->kernels = CALLOC(sizeof(struct r600_kernel), + shader->num_kernels); + { + unsigned i; + for (i = 0; i < shader->num_kernels; i++) { + struct r600_kernel *kernel = &shader->kernels[i]; + kernel->llvm_module = radeon_llvm_get_kernel_module( + shader->llvm_ctx, i, code, header->num_bytes); + } + } +#endif +#else + memset(&shader->binary, 0, sizeof(shader->binary)); + radeon_elf_read(code, header->num_bytes, &shader->binary, true); + r600_create_shader(&shader->bc, &shader->binary, &use_kill); + + shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, + shader->bc.ndw * 4); + p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); + memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); + ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf); #endif shader->ctx = (struct r600_context*)ctx; @@ -217,17 +242,6 @@ void *evergreen_create_compute_state( shader->private_size = cso->req_private_mem; shader->input_size = cso->req_input_mem; -#ifdef HAVE_OPENCL - shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code, - header->num_bytes); - shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels); - - for (i = 0; i < shader->num_kernels; i++) { - struct r600_kernel *kernel = &shader->kernels[i]; - kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i, - code, header->num_bytes); - } -#endif return shader; } @@ -238,14 +252,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state) if (!shader) return; - FREE(shader->kernels); - -#ifdef HAVE_OPENCL - if (shader->llvm_ctx){ - LLVMContextDispose(shader->llvm_ctx); - } -#endif - FREE(shader); } @@ -347,7 +353,13 @@ static void evergreen_emit_direct_dispatch( unsigned wave_divisor = (16 * num_pipes); int group_size = 1; int grid_size = 1; - unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw; + unsigned lds_size = shader->local_size / 4 + +#if HAVE_LLVM < 0x0306 + shader->active_kernel->bc.nlds_dw; +#else + shader->bc.nlds_dw; +#endif + /* Calculate group_size/grid_size */ for (i = 0; i < 3; i++) { @@ -520,19 +532,34 @@ void evergreen_emit_cs_shader( struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; - struct r600_kernel *kernel = &shader->kernels[state->kernel_index]; struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + uint64_t va; + struct r600_resource *code_bo; + unsigned ngpr, nstack; + +#if HAVE_LLVM < 0x0306 + struct r600_kernel *kernel = &shader->kernels[state->kernel_index]; + code_bo = kernel->code_bo; + va = kernel->code_bo->gpu_address; + ngpr = kernel->bc.ngpr; + nstack = kernel->bc.nstack; +#else + code_bo = shader->code_bo; + va = shader->code_bo->gpu_address + state->pc; + ngpr = shader->bc.ngpr; + nstack = shader->bc.nstack; +#endif r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3); - radeon_emit(cs, kernel->code_bo->gpu_address >> 8); /* R_0288D0_SQ_PGM_START_LS */ + radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */ radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */ - S_0288D4_NUM_GPRS(kernel->bc.ngpr) - | S_0288D4_STACK_SIZE(kernel->bc.nstack)); + S_0288D4_NUM_GPRS(ngpr) + | S_0288D4_STACK_SIZE(nstack)); radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, - kernel->code_bo, RADEON_USAGE_READ, + code_bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); } @@ -542,46 +569,54 @@ static void evergreen_launch_grid( uint32_t pc, const void *input) { struct r600_context *ctx = (struct r600_context *)ctx_; - struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; - struct r600_kernel *kernel = &shader->kernels[pc]; - - COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); + boolean use_kill; +#if HAVE_LLVM < 0x0306 #ifdef HAVE_OPENCL - - if (!kernel->code_bo) { - void *p; - struct r600_bytecode *bc = &kernel->bc; - LLVMModuleRef mod = kernel->llvm_module; - boolean use_kill = false; - bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0; - unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS; - unsigned sb_disasm = use_sb || - (ctx->screen->b.debug_flags & DBG_SB_DISASM); - - r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family, - ctx->screen->has_compressed_msaa_texturing); - bc->type = TGSI_PROCESSOR_COMPUTE; - bc->isa = ctx->isa; - r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump); - - if (dump && !sb_disasm) { - r600_bytecode_disasm(bc); - } else if ((dump && sb_disasm) || use_sb) { - if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb)) - R600_ERR("r600_sb_bytecode_process failed!\n"); - } - - kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, - kernel->bc.ndw * 4); - p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE); - memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); - ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf); - } + struct r600_kernel *kernel = &shader->kernels[pc]; + (void)use_kill; + if (!kernel->code_bo) { + void *p; + struct r600_bytecode *bc = &kernel->bc; + LLVMModuleRef mod = kernel->llvm_module; + boolean use_kill = false; + bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0; + unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS; + unsigned sb_disasm = use_sb || + (ctx->screen->b.debug_flags & DBG_SB_DISASM); + + r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family, + ctx->screen->has_compressed_msaa_texturing); + bc->type = TGSI_PROCESSOR_COMPUTE; + bc->isa = ctx->isa; + r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump); + + if (dump && !sb_disasm) { + r600_bytecode_disasm(bc); + } else if ((dump && sb_disasm) || use_sb) { + if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb)) + R600_ERR("r600_sb_bytecode_process failed!\n"); + } + + kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, + kernel->bc.ndw * 4); + p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE); + memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); + ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf); + } #endif shader->active_kernel = kernel; ctx->cs_shader_state.kernel_index = pc; +#else + ctx->cs_shader_state.pc = pc; + /* Get the config information for this kernel. */ + r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill); +#endif + + COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); + + evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); compute_emit_cs(ctx, block_layout, grid_layout); } |