From 5ab25bb4ba429a866c2e36bd543bf0405047e325 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 17 Oct 2016 12:30:42 +0200 Subject: radeonsi: import all TGSI->LLVM code from gallium/radeon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Nicolai Hähnle Reviewed-by: Emil Velikov Acked-by: Edward O'Callaghan --- .../drivers/r600/evergreen_compute_internal.h | 1 - src/gallium/drivers/radeon/Makefile.sources | 6 +- src/gallium/drivers/radeon/radeon_llvm.h | 151 --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 241 ---- src/gallium/drivers/radeon/radeon_llvm_emit.h | 46 - .../drivers/radeon/radeon_setup_tgsi_llvm.c | 1182 ----------------- src/gallium/drivers/radeonsi/Makefile.sources | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 2 - src/gallium/drivers/radeonsi/si_shader_internal.h | 132 ++ src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 1 - .../drivers/radeonsi/si_shader_tgsi_setup.c | 1379 ++++++++++++++++++++ 12 files changed, 1514 insertions(+), 1630 deletions(-) delete mode 100644 src/gallium/drivers/radeon/radeon_llvm.h delete mode 100644 src/gallium/drivers/radeon/radeon_llvm_emit.c delete mode 100644 src/gallium/drivers/radeon/radeon_llvm_emit.h delete mode 100644 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c create mode 100644 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h index e6ff7609aea..34d96f6d239 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.h +++ b/src/gallium/drivers/r600/evergreen_compute_internal.h @@ -27,7 +27,6 @@ #include "r600_asm.h" #ifdef HAVE_OPENCL -#include "radeon/radeon_llvm.h" #include #endif diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 049da60c134..3e13dae3cbb 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -25,8 +25,4 @@ C_SOURCES := \ LLVM_C_FILES := \ radeon_elf_util.c \ - radeon_elf_util.h \ - radeon_llvm_emit.c \ - radeon_llvm_emit.h \ - radeon_llvm.h \ - radeon_setup_tgsi_llvm.c + radeon_elf_util.h diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h deleted file mode 100644 index b4b968b26b7..00000000000 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright 2011 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Tom Stellard - * - */ - -#ifndef RADEON_LLVM_H -#define RADEON_LLVM_H - -#include -#include "gallivm/lp_bld_init.h" -#include "gallivm/lp_bld_tgsi.h" -#include "tgsi/tgsi_parse.h" - -#define RADEON_LLVM_MAX_INPUT_SLOTS 32 -#define RADEON_LLVM_MAX_INPUTS 32 * 4 -#define RADEON_LLVM_MAX_OUTPUTS 32 * 4 - -#define RADEON_LLVM_INITIAL_CF_DEPTH 4 - -#define RADEON_LLVM_MAX_SYSTEM_VALUES 4 - -struct radeon_llvm_flow; - -struct radeon_llvm_context { - struct lp_build_tgsi_soa_context soa; - - /*=== Front end configuration ===*/ - - /* Instructions that are not described by any of the TGSI opcodes. */ - - /** This function is responsible for initilizing the inputs array and will be - * called once for each input declared in the TGSI shader. - */ - void (*load_input)(struct radeon_llvm_context *, - unsigned input_index, - const struct tgsi_full_declaration *decl, - LLVMValueRef out[4]); - - void (*load_system_value)(struct radeon_llvm_context *, - unsigned index, - const struct tgsi_full_declaration *decl); - - void (*declare_memory_region)(struct radeon_llvm_context *, - const struct tgsi_full_declaration *decl); - - /** This array contains the input values for the shader. Typically these - * values will be in the form of a target intrinsic that will inform the - * backend how to load the actual inputs to the shader. - */ - struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS]; - LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS]; - LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS]; - - /** This pointer is used to contain the temporary values. - * The amount of temporary used in tgsi can't be bound to a max value and - * thus we must allocate this array at runtime. - */ - LLVMValueRef *temps; - unsigned temps_count; - LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; - - /*=== Private Members ===*/ - - struct radeon_llvm_flow *flow; - unsigned flow_depth; - unsigned flow_depth_max; - - struct tgsi_array_info *temp_arrays; - LLVMValueRef *temp_array_allocas; - - LLVMValueRef undef_alloca; - - LLVMValueRef main_fn; - LLVMTypeRef return_type; - - unsigned fpmath_md_kind; - LLVMValueRef fpmath_md_2p5_ulp; - - struct gallivm_state gallivm; -}; - -LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type); - -LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type, LLVMValueRef value); - -LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx, - LLVMValueRef index, - unsigned num); - -void radeon_llvm_context_init(struct radeon_llvm_context *ctx, - const char *triple, - const struct tgsi_shader_info *info, - const struct tgsi_token *tokens); - -void radeon_llvm_create_func(struct radeon_llvm_context *ctx, - LLVMTypeRef *return_types, unsigned num_return_elems, - LLVMTypeRef *ParamTypes, unsigned ParamCount); - -void radeon_llvm_dispose(struct radeon_llvm_context *ctx); - -void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx, - bool run_verifier); - -LLVMValueRef radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type, - LLVMValueRef ptr, - LLVMValueRef ptr2); - -LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base, - LLVMValueRef value); - -LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle); - -void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info, - LLVMValueRef dst[4]); - -static inline struct radeon_llvm_context * -radeon_llvm_context(struct lp_build_tgsi_context *bld_base) -{ - return (struct radeon_llvm_context*)bld_base; -} - -#endif /* RADEON_LLVM_H */ diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c deleted file mode 100644 index 9611f15103c..00000000000 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright 2011 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Tom Stellard - * - */ - -#include "radeon_llvm_emit.h" -#include "radeon_elf_util.h" -#include "c11/threads.h" -#include "gallivm/lp_bld_misc.h" -#include "util/u_debug.h" -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_state.h" - -#include -#include -#include - -#include -#include -#include - -#define CPU_STRING_LEN 30 -#define FS_STRING_LEN 30 -#define TRIPLE_STRING_LEN 7 - -/** - * Shader types for the LLVM backend. - */ -enum radeon_llvm_shader_type { - RADEON_LLVM_SHADER_PS = 0, - RADEON_LLVM_SHADER_VS = 1, - RADEON_LLVM_SHADER_GS = 2, - RADEON_LLVM_SHADER_CS = 3, -}; - -enum radeon_llvm_calling_convention { - RADEON_LLVM_AMDGPU_VS = 87, - RADEON_LLVM_AMDGPU_GS = 88, - RADEON_LLVM_AMDGPU_PS = 89, - RADEON_LLVM_AMDGPU_CS = 90, -}; - -void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) -{ - char str[16]; - - snprintf(str, sizeof(str), "%i", value); - LLVMAddTargetDependentFunctionAttr(F, name, str); -} - -/** - * Set the shader type we want to compile - * - * @param type shader type to set - */ -void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) -{ - enum radeon_llvm_shader_type llvm_type; - enum radeon_llvm_calling_convention calling_conv; - - switch (type) { - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_TESS_CTRL: - case PIPE_SHADER_TESS_EVAL: - llvm_type = RADEON_LLVM_SHADER_VS; - calling_conv = RADEON_LLVM_AMDGPU_VS; - break; - case PIPE_SHADER_GEOMETRY: - llvm_type = RADEON_LLVM_SHADER_GS; - calling_conv = RADEON_LLVM_AMDGPU_GS; - break; - case PIPE_SHADER_FRAGMENT: - llvm_type = RADEON_LLVM_SHADER_PS; - calling_conv = RADEON_LLVM_AMDGPU_PS; - break; - case PIPE_SHADER_COMPUTE: - llvm_type = RADEON_LLVM_SHADER_CS; - calling_conv = RADEON_LLVM_AMDGPU_CS; - break; - default: - unreachable("Unhandle shader type"); - } - - if (HAVE_LLVM >= 0x309) - LLVMSetFunctionCallConv(F, calling_conv); - else - radeon_llvm_add_attribute(F, "ShaderType", llvm_type); -} - -static void init_r600_target() -{ - gallivm_init_llvm_targets(); -#if HAVE_LLVM < 0x0307 - LLVMInitializeR600TargetInfo(); - LLVMInitializeR600Target(); - LLVMInitializeR600TargetMC(); - LLVMInitializeR600AsmPrinter(); -#else - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeAMDGPUAsmPrinter(); - -#endif -} - -static once_flag init_r600_target_once_flag = ONCE_FLAG_INIT; - -LLVMTargetRef radeon_llvm_get_r600_target(const char *triple) -{ - LLVMTargetRef target = NULL; - char *err_message = NULL; - - call_once(&init_r600_target_once_flag, init_r600_target); - - if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { - fprintf(stderr, "Cannot find target for triple %s ", triple); - if (err_message) { - fprintf(stderr, "%s\n", err_message); - } - LLVMDisposeMessage(err_message); - return NULL; - } - return target; -} - -struct radeon_llvm_diagnostics { - struct pipe_debug_callback *debug; - unsigned retval; -}; - -static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) -{ - struct radeon_llvm_diagnostics *diag = (struct radeon_llvm_diagnostics *)context; - LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); - char *description = LLVMGetDiagInfoDescription(di); - const char *severity_str = NULL; - - switch (severity) { - case LLVMDSError: - severity_str = "error"; - break; - case LLVMDSWarning: - severity_str = "warning"; - break; - case LLVMDSRemark: - severity_str = "remark"; - break; - case LLVMDSNote: - severity_str = "note"; - break; - default: - severity_str = "unknown"; - } - - pipe_debug_message(diag->debug, SHADER_INFO, - "LLVM diagnostic (%s): %s", severity_str, description); - - if (severity == LLVMDSError) { - diag->retval = 1; - fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description); - } - - LLVMDisposeMessage(description); -} - -/** - * Compile an LLVM module to machine code. - * - * @returns 0 for success, 1 for failure - */ -unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, - LLVMTargetMachineRef tm, - struct pipe_debug_callback *debug) -{ - struct radeon_llvm_diagnostics diag; - char *err; - LLVMContextRef llvm_ctx; - LLVMMemoryBufferRef out_buffer; - unsigned buffer_size; - const char *buffer_data; - LLVMBool mem_err; - - diag.debug = debug; - diag.retval = 0; - - /* Setup Diagnostic Handler*/ - llvm_ctx = LLVMGetModuleContext(M); - - LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &diag); - - /* Compile IR*/ - mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, - &out_buffer); - - /* Process Errors/Warnings */ - if (mem_err) { - fprintf(stderr, "%s: %s", __FUNCTION__, err); - pipe_debug_message(debug, SHADER_INFO, - "LLVM emit error: %s", err); - FREE(err); - diag.retval = 1; - goto out; - } - - /* Extract Shader Code*/ - buffer_size = LLVMGetBufferSize(out_buffer); - buffer_data = LLVMGetBufferStart(out_buffer); - - radeon_elf_read(buffer_data, buffer_size, binary); - - /* Clean up */ - LLVMDisposeMemoryBuffer(out_buffer); - -out: - if (diag.retval != 0) - pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); - return diag.retval; -} diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h deleted file mode 100644 index 677978b4758..00000000000 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2012 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Tom Stellard - * - */ - -#ifndef RADEON_LLVM_EMIT_H -#define RADEON_LLVM_EMIT_H - -#include -#include -#include - -struct pipe_debug_callback; -struct radeon_shader_binary; - -void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value); -void radeon_llvm_shader_type(LLVMValueRef F, unsigned type); - -LLVMTargetRef radeon_llvm_get_r600_target(const char *triple); - -unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, - LLVMTargetMachineRef tm, - struct pipe_debug_callback *debug); - -#endif /* RADEON_LLVM_EMIT_H */ diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c deleted file mode 100644 index c06eb3eba70..00000000000 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ /dev/null @@ -1,1182 +0,0 @@ -/* - * Copyright 2011 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Tom Stellard - * - */ -#include "radeon_llvm.h" - -#include "gallivm/lp_bld_const.h" -#include "gallivm/lp_bld_gather.h" -#include "gallivm/lp_bld_flow.h" -#include "gallivm/lp_bld_init.h" -#include "gallivm/lp_bld_intr.h" -#include "gallivm/lp_bld_misc.h" -#include "gallivm/lp_bld_swizzle.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_parse.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_debug.h" - -#include -#include -#include - -/* Data for if/else/endif and bgnloop/endloop control flow structures. - */ -struct radeon_llvm_flow { - /* Loop exit or next part of if/else/endif. */ - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef loop_entry_block; -}; - -LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type) -{ - LLVMContextRef ctx = bld_base->base.gallivm->context; - - switch (type) { - case TGSI_TYPE_UNSIGNED: - case TGSI_TYPE_SIGNED: - return LLVMInt32TypeInContext(ctx); - case TGSI_TYPE_UNSIGNED64: - case TGSI_TYPE_SIGNED64: - return LLVMInt64TypeInContext(ctx); - case TGSI_TYPE_DOUBLE: - return LLVMDoubleTypeInContext(ctx); - case TGSI_TYPE_UNTYPED: - case TGSI_TYPE_FLOAT: - return LLVMFloatTypeInContext(ctx); - default: break; - } - return 0; -} - -LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type, LLVMValueRef value) -{ - LLVMBuilderRef builder = bld_base->base.gallivm->builder; - LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type); - - if (dst_type) - return LLVMBuildBitCast(builder, value, dst_type, ""); - else - return value; -} - -/** - * Return a value that is equal to the given i32 \p index if it lies in [0,num) - * or an undefined value in the same interval otherwise. - */ -LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx, - LLVMValueRef index, - unsigned num) -{ - struct gallivm_state *gallivm = &ctx->gallivm; - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1); - LLVMValueRef cc; - - if (util_is_power_of_two(num)) { - index = LLVMBuildAnd(builder, index, c_max, ""); - } else { - /* In theory, this MAX pattern should result in code that is - * as good as the bit-wise AND above. - * - * In practice, LLVM generates worse code (at the time of - * writing), because its value tracking is not strong enough. - */ - cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); - index = LLVMBuildSelect(builder, cc, index, c_max, ""); - } - - return index; -} - -static struct radeon_llvm_flow * -get_current_flow(struct radeon_llvm_context *ctx) -{ - if (ctx->flow_depth > 0) - return &ctx->flow[ctx->flow_depth - 1]; - return NULL; -} - -static struct radeon_llvm_flow * -get_innermost_loop(struct radeon_llvm_context *ctx) -{ - for (unsigned i = ctx->flow_depth; i > 0; --i) { - if (ctx->flow[i - 1].loop_entry_block) - return &ctx->flow[i - 1]; - } - return NULL; -} - -static struct radeon_llvm_flow * -push_flow(struct radeon_llvm_context *ctx) -{ - struct radeon_llvm_flow *flow; - - if (ctx->flow_depth >= ctx->flow_depth_max) { - unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH); - ctx->flow = REALLOC(ctx->flow, - ctx->flow_depth_max * sizeof(*ctx->flow), - new_max * sizeof(*ctx->flow)); - ctx->flow_depth_max = new_max; - } - - flow = &ctx->flow[ctx->flow_depth]; - ctx->flow_depth++; - - flow->next_block = NULL; - flow->loop_entry_block = NULL; - return flow; -} - -static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base, - LLVMValueRef value, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w) -{ - LLVMValueRef swizzles[4]; - LLVMTypeRef i32t = - LLVMInt32TypeInContext(bld_base->base.gallivm->context); - - swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0); - swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0); - swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0); - swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0); - - return LLVMBuildShuffleVector(bld_base->base.gallivm->builder, - value, - LLVMGetUndef(LLVMTypeOf(value)), - LLVMConstVector(swizzles, 4), ""); -} - -/** - * Return the description of the array covering the given temporary register - * index. - */ -static unsigned -get_temp_array_id(struct lp_build_tgsi_context *bld_base, - unsigned reg_index, - const struct tgsi_ind_register *reg) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - unsigned num_arrays = ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY]; - unsigned i; - - if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays) - return reg->ArrayID; - - for (i = 0; i < num_arrays; i++) { - const struct tgsi_array_info *array = &ctx->temp_arrays[i]; - - if (reg_index >= array->range.First && reg_index <= array->range.Last) - return i + 1; - } - - return 0; -} - -static struct tgsi_declaration_range -get_array_range(struct lp_build_tgsi_context *bld_base, - unsigned File, unsigned reg_index, - const struct tgsi_ind_register *reg) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct tgsi_declaration_range range; - - if (File == TGSI_FILE_TEMPORARY) { - unsigned array_id = get_temp_array_id(bld_base, reg_index, reg); - if (array_id) - return ctx->temp_arrays[array_id - 1].range; - } - - range.First = 0; - range.Last = bld_base->info->file_max[File]; - return range; -} - -static LLVMValueRef -emit_array_index(struct lp_build_tgsi_soa_context *bld, - const struct tgsi_ind_register *reg, - unsigned offset) -{ - struct gallivm_state *gallivm = bld->bld_base.base.gallivm; - - if (!reg) { - return lp_build_const_int32(gallivm, offset); - } - LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], ""); - return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), ""); -} - -/** - * For indirect registers, construct a pointer directly to the requested - * element using getelementptr if possible. - * - * Returns NULL if the insertelement/extractelement fallback for array access - * must be used. - */ -static LLVMValueRef -get_pointer_into_array(struct radeon_llvm_context *ctx, - unsigned file, - unsigned swizzle, - unsigned reg_index, - const struct tgsi_ind_register *reg_indirect) -{ - unsigned array_id; - struct tgsi_array_info *array; - struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef idxs[2]; - LLVMValueRef index; - LLVMValueRef alloca; - - if (file != TGSI_FILE_TEMPORARY) - return NULL; - - array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, reg_indirect); - if (!array_id) - return NULL; - - alloca = ctx->temp_array_allocas[array_id - 1]; - if (!alloca) - return NULL; - - array = &ctx->temp_arrays[array_id - 1]; - - if (!(array->writemask & (1 << swizzle))) - return ctx->undef_alloca; - - index = emit_array_index(&ctx->soa, reg_indirect, - reg_index - ctx->temp_arrays[array_id - 1].range.First); - - /* Ensure that the index is within a valid range, to guard against - * VM faults and overwriting critical data (e.g. spilled resource - * descriptors). - * - * TODO It should be possible to avoid the additional instructions - * if LLVM is changed so that it guarantuees: - * 1. the scratch space descriptor isolates the current wave (this - * could even save the scratch offset SGPR at the cost of an - * additional SALU instruction) - * 2. the memory for allocas must be allocated at the _end_ of the - * scratch space (after spilled registers) - */ - index = radeon_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1); - - index = LLVMBuildMul( - builder, index, - lp_build_const_int32(gallivm, util_bitcount(array->writemask)), - ""); - index = LLVMBuildAdd( - builder, index, - lp_build_const_int32( - gallivm, - util_bitcount(array->writemask & ((1 << swizzle) - 1))), - ""); - idxs[0] = ctx->soa.bld_base.uint_bld.zero; - idxs[1] = index; - return LLVMBuildGEP(builder, alloca, idxs, 2, ""); -} - -LLVMValueRef -radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type, - LLVMValueRef ptr, - LLVMValueRef ptr2) -{ - LLVMBuilderRef builder = bld_base->base.gallivm->builder; - LLVMValueRef result; - - result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); - - result = LLVMBuildInsertElement(builder, - result, - bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr), - bld_base->int_bld.zero, ""); - result = LLVMBuildInsertElement(builder, - result, - bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2), - bld_base->int_bld.one, ""); - return bitcast(bld_base, type, result); -} - -static LLVMValueRef -emit_array_fetch(struct lp_build_tgsi_context *bld_base, - unsigned File, enum tgsi_opcode_type type, - struct tgsi_declaration_range range, - unsigned swizzle) -{ - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - struct gallivm_state *gallivm = bld->bld_base.base.gallivm; - LLVMBuilderRef builder = bld_base->base.gallivm->builder; - - unsigned i, size = range.Last - range.First + 1; - LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size); - LLVMValueRef result = LLVMGetUndef(vec); - - struct tgsi_full_src_register tmp_reg = {}; - tmp_reg.Register.File = File; - - for (i = 0; i < size; ++i) { - tmp_reg.Register.Index = i + range.First; - LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle); - result = LLVMBuildInsertElement(builder, result, temp, - lp_build_const_int32(gallivm, i), "array_vector"); - } - return result; -} - -static LLVMValueRef -load_value_from_array(struct lp_build_tgsi_context *bld_base, - unsigned file, - enum tgsi_opcode_type type, - unsigned swizzle, - unsigned reg_index, - const struct tgsi_ind_register *reg_indirect) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ptr; - - ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect); - if (ptr) { - LLVMValueRef val = LLVMBuildLoad(builder, ptr, ""); - if (tgsi_type_is_64bit(type)) { - LLVMValueRef ptr_hi, val_hi; - ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, ""); - val_hi = LLVMBuildLoad(builder, ptr_hi, ""); - val = radeon_llvm_emit_fetch_64bit(bld_base, type, val, val_hi); - } - - return val; - } else { - struct tgsi_declaration_range range = - get_array_range(bld_base, file, reg_index, reg_indirect); - LLVMValueRef index = - emit_array_index(bld, reg_indirect, reg_index - range.First); - LLVMValueRef array = - emit_array_fetch(bld_base, file, type, range, swizzle); - return LLVMBuildExtractElement(builder, array, index, ""); - } -} - -static void -store_value_to_array(struct lp_build_tgsi_context *bld_base, - LLVMValueRef value, - unsigned file, - unsigned chan_index, - unsigned reg_index, - const struct tgsi_ind_register *reg_indirect) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ptr; - - ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect); - if (ptr) { - LLVMBuildStore(builder, value, ptr); - } else { - unsigned i, size; - struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect); - LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First); - LLVMValueRef array = - emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index); - LLVMValueRef temp_ptr; - - array = LLVMBuildInsertElement(builder, array, value, index, ""); - - size = range.Last - range.First + 1; - for (i = 0; i < size; ++i) { - switch(file) { - case TGSI_FILE_OUTPUT: - temp_ptr = bld->outputs[i + range.First][chan_index]; - break; - - case TGSI_FILE_TEMPORARY: - if (range.First + i >= ctx->temps_count) - continue; - temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index]; - break; - - default: - continue; - } - value = LLVMBuildExtractElement(builder, array, - lp_build_const_int32(gallivm, i), ""); - LLVMBuildStore(builder, value, temp_ptr); - } - } -} - -LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - LLVMBuilderRef builder = bld_base->base.gallivm->builder; - LLVMValueRef result = NULL, ptr, ptr2; - - if (swizzle == ~0) { - LLVMValueRef values[TGSI_NUM_CHANNELS]; - unsigned chan; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan); - } - return lp_build_gather_values(bld_base->base.gallivm, values, - TGSI_NUM_CHANNELS); - } - - if (reg->Register.Indirect) { - LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type, - swizzle, reg->Register.Index, ®->Indirect); - return bitcast(bld_base, type, load); - } - - switch(reg->Register.File) { - case TGSI_FILE_IMMEDIATE: { - LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); - if (tgsi_type_is_64bit(type)) { - result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); - result = LLVMConstInsertElement(result, - bld->immediates[reg->Register.Index][swizzle], - bld_base->int_bld.zero); - result = LLVMConstInsertElement(result, - bld->immediates[reg->Register.Index][swizzle + 1], - bld_base->int_bld.one); - return LLVMConstBitCast(result, ctype); - } else { - return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); - } - } - - case TGSI_FILE_INPUT: { - unsigned index = reg->Register.Index; - LLVMValueRef input[4]; - - /* I don't think doing this for vertex shaders is beneficial. - * For those, we want to make sure the VMEM loads are executed - * only once. Fragment shaders don't care much, because - * v_interp instructions are much cheaper than VMEM loads. - */ - if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT) - ctx->load_input(ctx, index, &ctx->input_decls[index], input); - else - memcpy(input, &ctx->inputs[index * 4], sizeof(input)); - - result = input[swizzle]; - - if (tgsi_type_is_64bit(type)) { - ptr = result; - ptr2 = input[swizzle + 1]; - return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2); - } - break; - } - - case TGSI_FILE_TEMPORARY: - if (reg->Register.Index >= ctx->temps_count) - return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); - ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; - if (tgsi_type_is_64bit(type)) { - ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1]; - return radeon_llvm_emit_fetch_64bit(bld_base, type, - LLVMBuildLoad(builder, ptr, ""), - LLVMBuildLoad(builder, ptr2, "")); - } - result = LLVMBuildLoad(builder, ptr, ""); - break; - - case TGSI_FILE_OUTPUT: - ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); - if (tgsi_type_is_64bit(type)) { - ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1); - return radeon_llvm_emit_fetch_64bit(bld_base, type, - LLVMBuildLoad(builder, ptr, ""), - LLVMBuildLoad(builder, ptr2, "")); - } - result = LLVMBuildLoad(builder, ptr, ""); - break; - - default: - return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); - } - - return bitcast(bld_base, type, result); -} - -static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - - LLVMValueRef cval = ctx->system_values[reg->Register.Index]; - if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) { - cval = LLVMBuildExtractElement(gallivm->builder, cval, - lp_build_const_int32(gallivm, swizzle), ""); - } - return bitcast(bld_base, type, cval); -} - -static void emit_declaration(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_declaration *decl) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - LLVMBuilderRef builder = bld_base->base.gallivm->builder; - unsigned first, last, i; - switch(decl->Declaration.File) { - case TGSI_FILE_ADDRESS: - { - unsigned idx; - for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { - unsigned chan; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - ctx->soa.addr[idx][chan] = lp_build_alloca_undef( - &ctx->gallivm, - ctx->soa.bld_base.uint_bld.elem_type, ""); - } - } - break; - } - - case TGSI_FILE_TEMPORARY: - { - char name[16] = ""; - LLVMValueRef array_alloca = NULL; - unsigned decl_size; - unsigned writemask = decl->Declaration.UsageMask; - first = decl->Range.First; - last = decl->Range.Last; - decl_size = 4 * ((last - first) + 1); - - if (decl->Declaration.Array) { - unsigned id = decl->Array.ArrayID - 1; - unsigned array_size; - - writemask &= ctx->temp_arrays[id].writemask; - ctx->temp_arrays[id].writemask = writemask; - array_size = ((last - first) + 1) * util_bitcount(writemask); - - /* If the array has more than 16 elements, store it - * in memory using an alloca that spans the entire - * array. - * - * Otherwise, store each array element individually. - * We will then generate vectors (per-channel, up to - * <16 x float> if the usagemask is a single bit) for - * indirect addressing. - * - * Note that 16 is the number of vector elements that - * LLVM will store in a register, so theoretically an - * array with up to 4 * 16 = 64 elements could be - * handled this way, but whether that's a good idea - * depends on VGPR register pressure elsewhere. - * - * FIXME: We shouldn't need to have the non-alloca - * code path for arrays. LLVM should be smart enough to - * promote allocas into registers when profitable. - * - * LLVM 3.8 crashes with this. - */ - if (HAVE_LLVM >= 0x0309 && array_size > 16) { - array_alloca = LLVMBuildAlloca(builder, - LLVMArrayType(bld_base->base.vec_type, - array_size), "array"); - ctx->temp_array_allocas[id] = array_alloca; - } - } - - if (!ctx->temps_count) { - ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; - ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); - } - if (!array_alloca) { - for (i = 0; i < decl_size; ++i) { -#ifdef DEBUG - snprintf(name, sizeof(name), "TEMP%d.%c", - first + i / 4, "xyzw"[i % 4]); -#endif - ctx->temps[first * TGSI_NUM_CHANNELS + i] = - lp_build_alloca_undef(bld_base->base.gallivm, - bld_base->base.vec_type, - name); - } - } else { - LLVMValueRef idxs[2] = { - bld_base->uint_bld.zero, - NULL - }; - unsigned j = 0; - - if (writemask != TGSI_WRITEMASK_XYZW && - !ctx->undef_alloca) { - /* Create a dummy alloca. We use it so that we - * have a pointer that is safe to load from if - * a shader ever reads from a channel that - * it never writes to. - */ - ctx->undef_alloca = lp_build_alloca_undef( - bld_base->base.gallivm, - bld_base->base.vec_type, "undef"); - } - - for (i = 0; i < decl_size; ++i) { - LLVMValueRef ptr; - if (writemask & (1 << (i % 4))) { -#ifdef DEBUG - snprintf(name, sizeof(name), "TEMP%d.%c", - first + i / 4, "xyzw"[i % 4]); -#endif - idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j); - ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name); - j++; - } else { - ptr = ctx->undef_alloca; - } - ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr; - } - } - break; - } - case TGSI_FILE_INPUT: - { - unsigned idx; - for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { - if (ctx->load_input && - ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) { - ctx->input_decls[idx] = *decl; - - if (bld_base->info->processor != PIPE_SHADER_FRAGMENT) - ctx->load_input(ctx, idx, decl, - &ctx->inputs[idx * 4]); - } - } - } - break; - - case TGSI_FILE_SYSTEM_VALUE: - { - unsigned idx; - for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { - ctx->load_system_value(ctx, idx, decl); - } - } - break; - - case TGSI_FILE_OUTPUT: - { - char name[16] = ""; - unsigned idx; - for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { - unsigned chan; - assert(idx < RADEON_LLVM_MAX_OUTPUTS); - if (ctx->soa.outputs[idx][0]) - continue; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { -#ifdef DEBUG - snprintf(name, sizeof(name), "OUT%d.%c", - idx, "xyzw"[chan % 4]); -#endif - ctx->soa.outputs[idx][chan] = lp_build_alloca_undef( - &ctx->gallivm, - ctx->soa.bld_base.base.elem_type, name); - } - } - break; - } - - case TGSI_FILE_MEMORY: - ctx->declare_memory_region(ctx, decl); - break; - - default: - break; - } -} - -LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base, - LLVMValueRef value) -{ - struct lp_build_emit_data clamp_emit_data; - - memset(&clamp_emit_data, 0, sizeof(clamp_emit_data)); - clamp_emit_data.arg_count = 3; - clamp_emit_data.args[0] = value; - clamp_emit_data.args[2] = bld_base->base.one; - clamp_emit_data.args[1] = bld_base->base.zero; - - return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP, - &clamp_emit_data); -} - -void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info, - LLVMValueRef dst[4]) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - struct gallivm_state *gallivm = bld->bld_base.base.gallivm; - const struct tgsi_full_dst_register *reg = &inst->Dst[0]; - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; - LLVMValueRef temp_ptr, temp_ptr2 = NULL; - unsigned chan, chan_index; - bool is_vec_store = false; - enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); - - if (dst[0]) { - LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); - is_vec_store = (k == LLVMVectorTypeKind); - } - - if (is_vec_store) { - LLVMValueRef values[4] = {}; - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { - LLVMValueRef index = lp_build_const_int32(gallivm, chan); - values[chan] = LLVMBuildExtractElement(gallivm->builder, - dst[0], index, ""); - } - bld_base->emit_store(bld_base, inst, info, values); - return; - } - - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - LLVMValueRef value = dst[chan_index]; - - if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) - continue; - if (inst->Instruction.Saturate) - value = radeon_llvm_saturate(bld_base, value); - - if (reg->Register.File == TGSI_FILE_ADDRESS) { - temp_ptr = bld->addr[reg->Register.Index][chan_index]; - LLVMBuildStore(builder, value, temp_ptr); - continue; - } - - if (!tgsi_type_is_64bit(dtype)) - value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); - - if (reg->Register.Indirect) { - unsigned file = reg->Register.File; - unsigned reg_index = reg->Register.Index; - store_value_to_array(bld_base, value, file, chan_index, - reg_index, ®->Indirect); - } else { - switch(reg->Register.File) { - case TGSI_FILE_OUTPUT: - temp_ptr = bld->outputs[reg->Register.Index][chan_index]; - if (tgsi_type_is_64bit(dtype)) - temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1]; - break; - - case TGSI_FILE_TEMPORARY: - { - if (reg->Register.Index >= ctx->temps_count) - continue; - - temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index]; - if (tgsi_type_is_64bit(dtype)) - temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1]; - - break; - } - default: - return; - } - if (!tgsi_type_is_64bit(dtype)) - LLVMBuildStore(builder, value, temp_ptr); - else { - LLVMValueRef ptr = LLVMBuildBitCast(builder, value, - LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), ""); - LLVMValueRef val2; - value = LLVMBuildExtractElement(builder, ptr, - bld_base->uint_bld.zero, ""); - val2 = LLVMBuildExtractElement(builder, ptr, - bld_base->uint_bld.one, ""); - - LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr); - LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2); - } - } - } -} - -static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc) -{ - char buf[32]; - /* Subtract 1 so that the number shown is that of the corresponding - * opcode in the TGSI dump, e.g. an if block has the same suffix as - * the instruction number of the corresponding TGSI IF. - */ - snprintf(buf, sizeof(buf), "%s%d", base, pc - 1); - LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf); -} - -/* Append a basic block at the level of the parent flow. - */ -static LLVMBasicBlockRef append_basic_block(struct radeon_llvm_context *ctx, - const char *name) -{ - struct gallivm_state *gallivm = &ctx->gallivm; - - assert(ctx->flow_depth >= 1); - - if (ctx->flow_depth >= 2) { - struct radeon_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2]; - - return LLVMInsertBasicBlockInContext(gallivm->context, - flow->next_block, name); - } - - return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name); -} - -/* Emit a branch to the given default target for the current block if - * applicable -- that is, if the current block does not already contain a - * branch from a break or continue. - */ -static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target) -{ - if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) - LLVMBuildBr(builder, target); -} - -static void bgnloop_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct radeon_llvm_flow *flow = push_flow(ctx); - flow->loop_entry_block = append_basic_block(ctx, "LOOP"); - flow->next_block = append_basic_block(ctx, "ENDLOOP"); - set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc); - LLVMBuildBr(gallivm->builder, flow->loop_entry_block); - LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block); -} - -static void brk_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct radeon_llvm_flow *flow = get_innermost_loop(ctx); - - LLVMBuildBr(gallivm->builder, flow->next_block); -} - -static void cont_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct radeon_llvm_flow *flow = get_innermost_loop(ctx); - - LLVMBuildBr(gallivm->builder, flow->loop_entry_block); -} - -static void else_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct radeon_llvm_flow *current_branch = get_current_flow(ctx); - LLVMBasicBlockRef endif_block; - - assert(!current_branch->loop_entry_block); - - endif_block = append_basic_block(ctx, "ENDIF"); - emit_default_branch(gallivm->builder, endif_block); - - LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block); - set_basicblock_name(current_branch->next_block, "else", bld_base->pc); - - current_branch->next_block = endif_block; -} - -static void endif_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct radeon_llvm_flow *current_branch = get_current_flow(ctx); - - assert(!current_branch->loop_entry_block); - - emit_default_branch(gallivm->builder, current_branch->next_block); - LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block); - set_basicblock_name(current_branch->next_block, "endif", bld_base->pc); - - ctx->flow_depth--; -} - -static void endloop_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct radeon_llvm_flow *current_loop = get_current_flow(ctx); - - assert(current_loop->loop_entry_block); - - emit_default_branch(gallivm->builder, current_loop->loop_entry_block); - - LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block); - set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc); - ctx->flow_depth--; -} - -static void if_cond_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data, - LLVMValueRef cond) -{ - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct radeon_llvm_flow *flow = push_flow(ctx); - LLVMBasicBlockRef if_block; - - if_block = append_basic_block(ctx, "IF"); - flow->next_block = append_basic_block(ctx, "ELSE"); - set_basicblock_name(if_block, "if", bld_base->pc); - LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block); - LLVMPositionBuilderAtEnd(gallivm->builder, if_block); -} - -static void if_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef cond; - - cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE, - emit_data->args[0], - bld_base->base.zero, ""); - - if_cond_emit(action, bld_base, emit_data, cond); -} - -static void uif_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef cond; - - cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, - bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]), - bld_base->int_bld.zero, ""); - - if_cond_emit(action, bld_base, emit_data, cond); -} - -static void emit_immediate(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_immediate *imm) -{ - unsigned i; - struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); - - for (i = 0; i < 4; ++i) { - ctx->soa.immediates[ctx->soa.num_immediates][i] = - LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); - } - - ctx->soa.num_immediates++; -} - -void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple, - const struct tgsi_shader_info *info, - const struct tgsi_token *tokens) -{ - struct lp_type type; - - /* Initialize the gallivm object: - * We are only using the module, context, and builder fields of this struct. - * This should be enough for us to be able to pass our gallivm struct to the - * helper functions in the gallivm module. - */ - memset(&ctx->gallivm, 0, sizeof (ctx->gallivm)); - memset(&ctx->soa, 0, sizeof(ctx->soa)); - ctx->gallivm.context = LLVMContextCreate(); - ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", - ctx->gallivm.context); - LLVMSetTarget(ctx->gallivm.module, triple); - ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context); - - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; - - bld_base->info = info; - - if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) { - int size = info->array_max[TGSI_FILE_TEMPORARY]; - - ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); - ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0])); - - if (tokens) - tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size, - ctx->temp_arrays); - } - - type.floating = true; - type.fixed = false; - type.sign = true; - type.norm = false; - type.width = 32; - type.length = 1; - - lp_build_context_init(&bld_base->base, &ctx->gallivm, type); - lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); - lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); - type.width *= 2; - lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, type); - lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type)); - lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, lp_int_type(type)); - - bld_base->soa = 1; - bld_base->emit_store = radeon_llvm_emit_store; - bld_base->emit_swizzle = emit_swizzle; - bld_base->emit_declaration = emit_declaration; - bld_base->emit_immediate = emit_immediate; - - bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch; - bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch; - bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch; - bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch; - bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value; - - /* metadata allowing 2.5 ULP */ - ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context, - "fpmath", 6); - LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5); - ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context, - &arg, 1); - - /* Allocate outputs */ - ctx->soa.outputs = ctx->outputs; - - bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; - bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; - bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; - bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; - bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit; - bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; - bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; - bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; -} - -void radeon_llvm_create_func(struct radeon_llvm_context *ctx, - LLVMTypeRef *return_types, unsigned num_return_elems, - LLVMTypeRef *ParamTypes, unsigned ParamCount) -{ - LLVMTypeRef main_fn_type, ret_type; - LLVMBasicBlockRef main_fn_body; - - if (num_return_elems) - ret_type = LLVMStructTypeInContext(ctx->gallivm.context, - return_types, - num_return_elems, true); - else - ret_type = LLVMVoidTypeInContext(ctx->gallivm.context); - - /* Setup the function */ - ctx->return_type = ret_type; - main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0); - ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type); - main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context, - ctx->main_fn, "main_body"); - LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body); -} - -void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx, - bool run_verifier) -{ - struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; - const char *triple = LLVMGetTarget(gallivm->module); - LLVMTargetLibraryInfoRef target_library_info; - - /* Create the pass manager */ - gallivm->passmgr = LLVMCreateFunctionPassManagerForModule( - gallivm->module); - - target_library_info = gallivm_create_target_library_info(triple); - LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr); - - if (run_verifier) - LLVMAddVerifierPass(gallivm->passmgr); - - /* This pass should eliminate all the load and store instructions */ - LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); - - /* Add some optimization passes */ - LLVMAddScalarReplAggregatesPass(gallivm->passmgr); - LLVMAddLICMPass(gallivm->passmgr); - LLVMAddAggressiveDCEPass(gallivm->passmgr); - LLVMAddCFGSimplificationPass(gallivm->passmgr); - LLVMAddInstructionCombiningPass(gallivm->passmgr); - - /* Run the pass */ - LLVMInitializeFunctionPassManager(gallivm->passmgr); - LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn); - LLVMFinalizeFunctionPassManager(gallivm->passmgr); - - LLVMDisposeBuilder(gallivm->builder); - LLVMDisposePassManager(gallivm->passmgr); - gallivm_dispose_target_library_info(target_library_info); -} - -void radeon_llvm_dispose(struct radeon_llvm_context *ctx) -{ - LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module); - LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context); - FREE(ctx->temp_arrays); - ctx->temp_arrays = NULL; - FREE(ctx->temp_array_allocas); - ctx->temp_array_allocas = NULL; - FREE(ctx->temps); - ctx->temps = NULL; - ctx->temps_count = 0; - FREE(ctx->flow); - ctx->flow = NULL; - ctx->flow_depth_max = 0; -} diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index a16f19c7ffb..727a9cccfc2 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -17,6 +17,7 @@ C_SOURCES := \ si_shader.h \ si_shader_internal.h \ si_shader_tgsi_alu.c \ + si_shader_tgsi_setup.c \ si_state.c \ si_state_draw.c \ si_state_shaders.c \ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5a3f1014cf2..79243753647 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -23,9 +23,9 @@ #include "si_pipe.h" #include "si_public.h" +#include "si_shader_internal.h" #include "sid.h" -#include "radeon/radeon_llvm_emit.h" #include "radeon/radeon_uvd.h" #include "util/u_memory.h" #include "util/u_suballoc.h" diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 37a812a2840..194da3194f8 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -33,9 +33,7 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_misc.h" -#include "radeon/radeon_llvm.h" #include "radeon/radeon_elf_util.h" -#include "radeon/radeon_llvm_emit.h" #include "util/u_memory.h" #include "util/u_string.h" #include "tgsi/tgsi_build.h" diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 44dd5fdc3b5..b46cc1c379d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -27,6 +27,138 @@ #include "si_shader.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_tgsi.h" +#include "tgsi/tgsi_parse.h" + +#include +#include + +struct pipe_debug_callback; +struct radeon_shader_binary; + +#define RADEON_LLVM_MAX_INPUT_SLOTS 32 +#define RADEON_LLVM_MAX_INPUTS 32 * 4 +#define RADEON_LLVM_MAX_OUTPUTS 32 * 4 + +#define RADEON_LLVM_INITIAL_CF_DEPTH 4 + +#define RADEON_LLVM_MAX_SYSTEM_VALUES 4 + +struct radeon_llvm_flow; + +struct radeon_llvm_context { + struct lp_build_tgsi_soa_context soa; + + /*=== Front end configuration ===*/ + + /* Instructions that are not described by any of the TGSI opcodes. */ + + /** This function is responsible for initilizing the inputs array and will be + * called once for each input declared in the TGSI shader. + */ + void (*load_input)(struct radeon_llvm_context *, + unsigned input_index, + const struct tgsi_full_declaration *decl, + LLVMValueRef out[4]); + + void (*load_system_value)(struct radeon_llvm_context *, + unsigned index, + const struct tgsi_full_declaration *decl); + + void (*declare_memory_region)(struct radeon_llvm_context *, + const struct tgsi_full_declaration *decl); + + /** This array contains the input values for the shader. Typically these + * values will be in the form of a target intrinsic that will inform the + * backend how to load the actual inputs to the shader. + */ + struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS]; + LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS]; + LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS]; + + /** This pointer is used to contain the temporary values. + * The amount of temporary used in tgsi can't be bound to a max value and + * thus we must allocate this array at runtime. + */ + LLVMValueRef *temps; + unsigned temps_count; + LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; + + /*=== Private Members ===*/ + + struct radeon_llvm_flow *flow; + unsigned flow_depth; + unsigned flow_depth_max; + + struct tgsi_array_info *temp_arrays; + LLVMValueRef *temp_array_allocas; + + LLVMValueRef undef_alloca; + + LLVMValueRef main_fn; + LLVMTypeRef return_type; + + unsigned fpmath_md_kind; + LLVMValueRef fpmath_md_2p5_ulp; + + struct gallivm_state gallivm; +}; + +static inline struct radeon_llvm_context * +radeon_llvm_context(struct lp_build_tgsi_context *bld_base) +{ + return (struct radeon_llvm_context*)bld_base; +} + +void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value); +void radeon_llvm_shader_type(LLVMValueRef F, unsigned type); + +LLVMTargetRef radeon_llvm_get_r600_target(const char *triple); + +unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug); + +LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, + enum tgsi_opcode_type type); + +LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, + enum tgsi_opcode_type type, LLVMValueRef value); + +LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx, + LLVMValueRef index, + unsigned num); + +void radeon_llvm_context_init(struct radeon_llvm_context *ctx, + const char *triple, + const struct tgsi_shader_info *info, + const struct tgsi_token *tokens); + +void radeon_llvm_create_func(struct radeon_llvm_context *ctx, + LLVMTypeRef *return_types, unsigned num_return_elems, + LLVMTypeRef *ParamTypes, unsigned ParamCount); + +void radeon_llvm_dispose(struct radeon_llvm_context *ctx); + +void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx, + bool run_verifier); + +LLVMValueRef radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, + enum tgsi_opcode_type type, + LLVMValueRef ptr, + LLVMValueRef ptr2); + +LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base, + LLVMValueRef value); + +LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_src_register *reg, + enum tgsi_opcode_type type, + unsigned swizzle); + +void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info, + LLVMValueRef dst[4]); void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base); void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 192a2cbf0ae..75ceb577b9c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -26,7 +26,6 @@ #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_gather.h" #include "tgsi/tgsi_parse.h" -#include "radeon/radeon_llvm.h" static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c new file mode 100644 index 00000000000..e1534c717a0 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -0,0 +1,1379 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "si_shader_internal.h" +#include "radeon/radeon_elf_util.h" + +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_gather.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_misc.h" +#include "gallivm/lp_bld_swizzle.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_debug.h" + +#include +#include + +/* Data for if/else/endif and bgnloop/endloop control flow structures. + */ +struct radeon_llvm_flow { + /* Loop exit or next part of if/else/endif. */ + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef loop_entry_block; +}; + +#define CPU_STRING_LEN 30 +#define FS_STRING_LEN 30 +#define TRIPLE_STRING_LEN 7 + +/** + * Shader types for the LLVM backend. + */ +enum radeon_llvm_shader_type { + RADEON_LLVM_SHADER_PS = 0, + RADEON_LLVM_SHADER_VS = 1, + RADEON_LLVM_SHADER_GS = 2, + RADEON_LLVM_SHADER_CS = 3, +}; + +enum radeon_llvm_calling_convention { + RADEON_LLVM_AMDGPU_VS = 87, + RADEON_LLVM_AMDGPU_GS = 88, + RADEON_LLVM_AMDGPU_PS = 89, + RADEON_LLVM_AMDGPU_CS = 90, +}; + +void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) +{ + char str[16]; + + snprintf(str, sizeof(str), "%i", value); + LLVMAddTargetDependentFunctionAttr(F, name, str); +} + +/** + * Set the shader type we want to compile + * + * @param type shader type to set + */ +void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) +{ + enum radeon_llvm_shader_type llvm_type; + enum radeon_llvm_calling_convention calling_conv; + + switch (type) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_TESS_CTRL: + case PIPE_SHADER_TESS_EVAL: + llvm_type = RADEON_LLVM_SHADER_VS; + calling_conv = RADEON_LLVM_AMDGPU_VS; + break; + case PIPE_SHADER_GEOMETRY: + llvm_type = RADEON_LLVM_SHADER_GS; + calling_conv = RADEON_LLVM_AMDGPU_GS; + break; + case PIPE_SHADER_FRAGMENT: + llvm_type = RADEON_LLVM_SHADER_PS; + calling_conv = RADEON_LLVM_AMDGPU_PS; + break; + case PIPE_SHADER_COMPUTE: + llvm_type = RADEON_LLVM_SHADER_CS; + calling_conv = RADEON_LLVM_AMDGPU_CS; + break; + default: + unreachable("Unhandle shader type"); + } + + if (HAVE_LLVM >= 0x309) + LLVMSetFunctionCallConv(F, calling_conv); + else + radeon_llvm_add_attribute(F, "ShaderType", llvm_type); +} + +static void init_r600_target() +{ + gallivm_init_llvm_targets(); +#if HAVE_LLVM < 0x0307 + LLVMInitializeR600TargetInfo(); + LLVMInitializeR600Target(); + LLVMInitializeR600TargetMC(); + LLVMInitializeR600AsmPrinter(); +#else + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmPrinter(); + +#endif +} + +static once_flag init_r600_target_once_flag = ONCE_FLAG_INIT; + +LLVMTargetRef radeon_llvm_get_r600_target(const char *triple) +{ + LLVMTargetRef target = NULL; + char *err_message = NULL; + + call_once(&init_r600_target_once_flag, init_r600_target); + + if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { + fprintf(stderr, "Cannot find target for triple %s ", triple); + if (err_message) { + fprintf(stderr, "%s\n", err_message); + } + LLVMDisposeMessage(err_message); + return NULL; + } + return target; +} + +struct radeon_llvm_diagnostics { + struct pipe_debug_callback *debug; + unsigned retval; +}; + +static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) +{ + struct radeon_llvm_diagnostics *diag = (struct radeon_llvm_diagnostics *)context; + LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); + char *description = LLVMGetDiagInfoDescription(di); + const char *severity_str = NULL; + + switch (severity) { + case LLVMDSError: + severity_str = "error"; + break; + case LLVMDSWarning: + severity_str = "warning"; + break; + case LLVMDSRemark: + severity_str = "remark"; + break; + case LLVMDSNote: + severity_str = "note"; + break; + default: + severity_str = "unknown"; + } + + pipe_debug_message(diag->debug, SHADER_INFO, + "LLVM diagnostic (%s): %s", severity_str, description); + + if (severity == LLVMDSError) { + diag->retval = 1; + fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description); + } + + LLVMDisposeMessage(description); +} + +/** + * Compile an LLVM module to machine code. + * + * @returns 0 for success, 1 for failure + */ +unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug) +{ + struct radeon_llvm_diagnostics diag; + char *err; + LLVMContextRef llvm_ctx; + LLVMMemoryBufferRef out_buffer; + unsigned buffer_size; + const char *buffer_data; + LLVMBool mem_err; + + diag.debug = debug; + diag.retval = 0; + + /* Setup Diagnostic Handler*/ + llvm_ctx = LLVMGetModuleContext(M); + + LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &diag); + + /* Compile IR*/ + mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, + &out_buffer); + + /* Process Errors/Warnings */ + if (mem_err) { + fprintf(stderr, "%s: %s", __FUNCTION__, err); + pipe_debug_message(debug, SHADER_INFO, + "LLVM emit error: %s", err); + FREE(err); + diag.retval = 1; + goto out; + } + + /* Extract Shader Code*/ + buffer_size = LLVMGetBufferSize(out_buffer); + buffer_data = LLVMGetBufferStart(out_buffer); + + radeon_elf_read(buffer_data, buffer_size, binary); + + /* Clean up */ + LLVMDisposeMemoryBuffer(out_buffer); + +out: + if (diag.retval != 0) + pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); + return diag.retval; +} + +LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, + enum tgsi_opcode_type type) +{ + LLVMContextRef ctx = bld_base->base.gallivm->context; + + switch (type) { + case TGSI_TYPE_UNSIGNED: + case TGSI_TYPE_SIGNED: + return LLVMInt32TypeInContext(ctx); + case TGSI_TYPE_UNSIGNED64: + case TGSI_TYPE_SIGNED64: + return LLVMInt64TypeInContext(ctx); + case TGSI_TYPE_DOUBLE: + return LLVMDoubleTypeInContext(ctx); + case TGSI_TYPE_UNTYPED: + case TGSI_TYPE_FLOAT: + return LLVMFloatTypeInContext(ctx); + default: break; + } + return 0; +} + +LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, + enum tgsi_opcode_type type, LLVMValueRef value) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type); + + if (dst_type) + return LLVMBuildBitCast(builder, value, dst_type, ""); + else + return value; +} + +/** + * Return a value that is equal to the given i32 \p index if it lies in [0,num) + * or an undefined value in the same interval otherwise. + */ +LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx, + LLVMValueRef index, + unsigned num) +{ + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1); + LLVMValueRef cc; + + if (util_is_power_of_two(num)) { + index = LLVMBuildAnd(builder, index, c_max, ""); + } else { + /* In theory, this MAX pattern should result in code that is + * as good as the bit-wise AND above. + * + * In practice, LLVM generates worse code (at the time of + * writing), because its value tracking is not strong enough. + */ + cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); + index = LLVMBuildSelect(builder, cc, index, c_max, ""); + } + + return index; +} + +static struct radeon_llvm_flow * +get_current_flow(struct radeon_llvm_context *ctx) +{ + if (ctx->flow_depth > 0) + return &ctx->flow[ctx->flow_depth - 1]; + return NULL; +} + +static struct radeon_llvm_flow * +get_innermost_loop(struct radeon_llvm_context *ctx) +{ + for (unsigned i = ctx->flow_depth; i > 0; --i) { + if (ctx->flow[i - 1].loop_entry_block) + return &ctx->flow[i - 1]; + } + return NULL; +} + +static struct radeon_llvm_flow * +push_flow(struct radeon_llvm_context *ctx) +{ + struct radeon_llvm_flow *flow; + + if (ctx->flow_depth >= ctx->flow_depth_max) { + unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH); + ctx->flow = REALLOC(ctx->flow, + ctx->flow_depth_max * sizeof(*ctx->flow), + new_max * sizeof(*ctx->flow)); + ctx->flow_depth_max = new_max; + } + + flow = &ctx->flow[ctx->flow_depth]; + ctx->flow_depth++; + + flow->next_block = NULL; + flow->loop_entry_block = NULL; + return flow; +} + +static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base, + LLVMValueRef value, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w) +{ + LLVMValueRef swizzles[4]; + LLVMTypeRef i32t = + LLVMInt32TypeInContext(bld_base->base.gallivm->context); + + swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0); + swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0); + swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0); + swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0); + + return LLVMBuildShuffleVector(bld_base->base.gallivm->builder, + value, + LLVMGetUndef(LLVMTypeOf(value)), + LLVMConstVector(swizzles, 4), ""); +} + +/** + * Return the description of the array covering the given temporary register + * index. + */ +static unsigned +get_temp_array_id(struct lp_build_tgsi_context *bld_base, + unsigned reg_index, + const struct tgsi_ind_register *reg) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + unsigned num_arrays = ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY]; + unsigned i; + + if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays) + return reg->ArrayID; + + for (i = 0; i < num_arrays; i++) { + const struct tgsi_array_info *array = &ctx->temp_arrays[i]; + + if (reg_index >= array->range.First && reg_index <= array->range.Last) + return i + 1; + } + + return 0; +} + +static struct tgsi_declaration_range +get_array_range(struct lp_build_tgsi_context *bld_base, + unsigned File, unsigned reg_index, + const struct tgsi_ind_register *reg) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct tgsi_declaration_range range; + + if (File == TGSI_FILE_TEMPORARY) { + unsigned array_id = get_temp_array_id(bld_base, reg_index, reg); + if (array_id) + return ctx->temp_arrays[array_id - 1].range; + } + + range.First = 0; + range.Last = bld_base->info->file_max[File]; + return range; +} + +static LLVMValueRef +emit_array_index(struct lp_build_tgsi_soa_context *bld, + const struct tgsi_ind_register *reg, + unsigned offset) +{ + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + + if (!reg) { + return lp_build_const_int32(gallivm, offset); + } + LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], ""); + return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), ""); +} + +/** + * For indirect registers, construct a pointer directly to the requested + * element using getelementptr if possible. + * + * Returns NULL if the insertelement/extractelement fallback for array access + * must be used. + */ +static LLVMValueRef +get_pointer_into_array(struct radeon_llvm_context *ctx, + unsigned file, + unsigned swizzle, + unsigned reg_index, + const struct tgsi_ind_register *reg_indirect) +{ + unsigned array_id; + struct tgsi_array_info *array; + struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef idxs[2]; + LLVMValueRef index; + LLVMValueRef alloca; + + if (file != TGSI_FILE_TEMPORARY) + return NULL; + + array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, reg_indirect); + if (!array_id) + return NULL; + + alloca = ctx->temp_array_allocas[array_id - 1]; + if (!alloca) + return NULL; + + array = &ctx->temp_arrays[array_id - 1]; + + if (!(array->writemask & (1 << swizzle))) + return ctx->undef_alloca; + + index = emit_array_index(&ctx->soa, reg_indirect, + reg_index - ctx->temp_arrays[array_id - 1].range.First); + + /* Ensure that the index is within a valid range, to guard against + * VM faults and overwriting critical data (e.g. spilled resource + * descriptors). + * + * TODO It should be possible to avoid the additional instructions + * if LLVM is changed so that it guarantuees: + * 1. the scratch space descriptor isolates the current wave (this + * could even save the scratch offset SGPR at the cost of an + * additional SALU instruction) + * 2. the memory for allocas must be allocated at the _end_ of the + * scratch space (after spilled registers) + */ + index = radeon_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1); + + index = LLVMBuildMul( + builder, index, + lp_build_const_int32(gallivm, util_bitcount(array->writemask)), + ""); + index = LLVMBuildAdd( + builder, index, + lp_build_const_int32( + gallivm, + util_bitcount(array->writemask & ((1 << swizzle) - 1))), + ""); + idxs[0] = ctx->soa.bld_base.uint_bld.zero; + idxs[1] = index; + return LLVMBuildGEP(builder, alloca, idxs, 2, ""); +} + +LLVMValueRef +radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, + enum tgsi_opcode_type type, + LLVMValueRef ptr, + LLVMValueRef ptr2) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef result; + + result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); + + result = LLVMBuildInsertElement(builder, + result, + bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr), + bld_base->int_bld.zero, ""); + result = LLVMBuildInsertElement(builder, + result, + bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2), + bld_base->int_bld.one, ""); + return bitcast(bld_base, type, result); +} + +static LLVMValueRef +emit_array_fetch(struct lp_build_tgsi_context *bld_base, + unsigned File, enum tgsi_opcode_type type, + struct tgsi_declaration_range range, + unsigned swizzle) +{ + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + + unsigned i, size = range.Last - range.First + 1; + LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size); + LLVMValueRef result = LLVMGetUndef(vec); + + struct tgsi_full_src_register tmp_reg = {}; + tmp_reg.Register.File = File; + + for (i = 0; i < size; ++i) { + tmp_reg.Register.Index = i + range.First; + LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle); + result = LLVMBuildInsertElement(builder, result, temp, + lp_build_const_int32(gallivm, i), "array_vector"); + } + return result; +} + +static LLVMValueRef +load_value_from_array(struct lp_build_tgsi_context *bld_base, + unsigned file, + enum tgsi_opcode_type type, + unsigned swizzle, + unsigned reg_index, + const struct tgsi_ind_register *reg_indirect) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ptr; + + ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect); + if (ptr) { + LLVMValueRef val = LLVMBuildLoad(builder, ptr, ""); + if (tgsi_type_is_64bit(type)) { + LLVMValueRef ptr_hi, val_hi; + ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, ""); + val_hi = LLVMBuildLoad(builder, ptr_hi, ""); + val = radeon_llvm_emit_fetch_64bit(bld_base, type, val, val_hi); + } + + return val; + } else { + struct tgsi_declaration_range range = + get_array_range(bld_base, file, reg_index, reg_indirect); + LLVMValueRef index = + emit_array_index(bld, reg_indirect, reg_index - range.First); + LLVMValueRef array = + emit_array_fetch(bld_base, file, type, range, swizzle); + return LLVMBuildExtractElement(builder, array, index, ""); + } +} + +static void +store_value_to_array(struct lp_build_tgsi_context *bld_base, + LLVMValueRef value, + unsigned file, + unsigned chan_index, + unsigned reg_index, + const struct tgsi_ind_register *reg_indirect) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ptr; + + ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect); + if (ptr) { + LLVMBuildStore(builder, value, ptr); + } else { + unsigned i, size; + struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect); + LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First); + LLVMValueRef array = + emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index); + LLVMValueRef temp_ptr; + + array = LLVMBuildInsertElement(builder, array, value, index, ""); + + size = range.Last - range.First + 1; + for (i = 0; i < size; ++i) { + switch(file) { + case TGSI_FILE_OUTPUT: + temp_ptr = bld->outputs[i + range.First][chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + if (range.First + i >= ctx->temps_count) + continue; + temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index]; + break; + + default: + continue; + } + value = LLVMBuildExtractElement(builder, array, + lp_build_const_int32(gallivm, i), ""); + LLVMBuildStore(builder, value, temp_ptr); + } + } +} + +LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_src_register *reg, + enum tgsi_opcode_type type, + unsigned swizzle) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef result = NULL, ptr, ptr2; + + if (swizzle == ~0) { + LLVMValueRef values[TGSI_NUM_CHANNELS]; + unsigned chan; + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan); + } + return lp_build_gather_values(bld_base->base.gallivm, values, + TGSI_NUM_CHANNELS); + } + + if (reg->Register.Indirect) { + LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type, + swizzle, reg->Register.Index, ®->Indirect); + return bitcast(bld_base, type, load); + } + + switch(reg->Register.File) { + case TGSI_FILE_IMMEDIATE: { + LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); + if (tgsi_type_is_64bit(type)) { + result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); + result = LLVMConstInsertElement(result, + bld->immediates[reg->Register.Index][swizzle], + bld_base->int_bld.zero); + result = LLVMConstInsertElement(result, + bld->immediates[reg->Register.Index][swizzle + 1], + bld_base->int_bld.one); + return LLVMConstBitCast(result, ctype); + } else { + return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); + } + } + + case TGSI_FILE_INPUT: { + unsigned index = reg->Register.Index; + LLVMValueRef input[4]; + + /* I don't think doing this for vertex shaders is beneficial. + * For those, we want to make sure the VMEM loads are executed + * only once. Fragment shaders don't care much, because + * v_interp instructions are much cheaper than VMEM loads. + */ + if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT) + ctx->load_input(ctx, index, &ctx->input_decls[index], input); + else + memcpy(input, &ctx->inputs[index * 4], sizeof(input)); + + result = input[swizzle]; + + if (tgsi_type_is_64bit(type)) { + ptr = result; + ptr2 = input[swizzle + 1]; + return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2); + } + break; + } + + case TGSI_FILE_TEMPORARY: + if (reg->Register.Index >= ctx->temps_count) + return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); + ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; + if (tgsi_type_is_64bit(type)) { + ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1]; + return radeon_llvm_emit_fetch_64bit(bld_base, type, + LLVMBuildLoad(builder, ptr, ""), + LLVMBuildLoad(builder, ptr2, "")); + } + result = LLVMBuildLoad(builder, ptr, ""); + break; + + case TGSI_FILE_OUTPUT: + ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); + if (tgsi_type_is_64bit(type)) { + ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1); + return radeon_llvm_emit_fetch_64bit(bld_base, type, + LLVMBuildLoad(builder, ptr, ""), + LLVMBuildLoad(builder, ptr2, "")); + } + result = LLVMBuildLoad(builder, ptr, ""); + break; + + default: + return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); + } + + return bitcast(bld_base, type, result); +} + +static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_src_register *reg, + enum tgsi_opcode_type type, + unsigned swizzle) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + + LLVMValueRef cval = ctx->system_values[reg->Register.Index]; + if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) { + cval = LLVMBuildExtractElement(gallivm->builder, cval, + lp_build_const_int32(gallivm, swizzle), ""); + } + return bitcast(bld_base, type, cval); +} + +static void emit_declaration(struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_declaration *decl) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + unsigned first, last, i; + switch(decl->Declaration.File) { + case TGSI_FILE_ADDRESS: + { + unsigned idx; + for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { + unsigned chan; + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + ctx->soa.addr[idx][chan] = lp_build_alloca_undef( + &ctx->gallivm, + ctx->soa.bld_base.uint_bld.elem_type, ""); + } + } + break; + } + + case TGSI_FILE_TEMPORARY: + { + char name[16] = ""; + LLVMValueRef array_alloca = NULL; + unsigned decl_size; + unsigned writemask = decl->Declaration.UsageMask; + first = decl->Range.First; + last = decl->Range.Last; + decl_size = 4 * ((last - first) + 1); + + if (decl->Declaration.Array) { + unsigned id = decl->Array.ArrayID - 1; + unsigned array_size; + + writemask &= ctx->temp_arrays[id].writemask; + ctx->temp_arrays[id].writemask = writemask; + array_size = ((last - first) + 1) * util_bitcount(writemask); + + /* If the array has more than 16 elements, store it + * in memory using an alloca that spans the entire + * array. + * + * Otherwise, store each array element individually. + * We will then generate vectors (per-channel, up to + * <16 x float> if the usagemask is a single bit) for + * indirect addressing. + * + * Note that 16 is the number of vector elements that + * LLVM will store in a register, so theoretically an + * array with up to 4 * 16 = 64 elements could be + * handled this way, but whether that's a good idea + * depends on VGPR register pressure elsewhere. + * + * FIXME: We shouldn't need to have the non-alloca + * code path for arrays. LLVM should be smart enough to + * promote allocas into registers when profitable. + * + * LLVM 3.8 crashes with this. + */ + if (HAVE_LLVM >= 0x0309 && array_size > 16) { + array_alloca = LLVMBuildAlloca(builder, + LLVMArrayType(bld_base->base.vec_type, + array_size), "array"); + ctx->temp_array_allocas[id] = array_alloca; + } + } + + if (!ctx->temps_count) { + ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; + ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); + } + if (!array_alloca) { + for (i = 0; i < decl_size; ++i) { +#ifdef DEBUG + snprintf(name, sizeof(name), "TEMP%d.%c", + first + i / 4, "xyzw"[i % 4]); +#endif + ctx->temps[first * TGSI_NUM_CHANNELS + i] = + lp_build_alloca_undef(bld_base->base.gallivm, + bld_base->base.vec_type, + name); + } + } else { + LLVMValueRef idxs[2] = { + bld_base->uint_bld.zero, + NULL + }; + unsigned j = 0; + + if (writemask != TGSI_WRITEMASK_XYZW && + !ctx->undef_alloca) { + /* Create a dummy alloca. We use it so that we + * have a pointer that is safe to load from if + * a shader ever reads from a channel that + * it never writes to. + */ + ctx->undef_alloca = lp_build_alloca_undef( + bld_base->base.gallivm, + bld_base->base.vec_type, "undef"); + } + + for (i = 0; i < decl_size; ++i) { + LLVMValueRef ptr; + if (writemask & (1 << (i % 4))) { +#ifdef DEBUG + snprintf(name, sizeof(name), "TEMP%d.%c", + first + i / 4, "xyzw"[i % 4]); +#endif + idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j); + ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name); + j++; + } else { + ptr = ctx->undef_alloca; + } + ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr; + } + } + break; + } + case TGSI_FILE_INPUT: + { + unsigned idx; + for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { + if (ctx->load_input && + ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) { + ctx->input_decls[idx] = *decl; + + if (bld_base->info->processor != PIPE_SHADER_FRAGMENT) + ctx->load_input(ctx, idx, decl, + &ctx->inputs[idx * 4]); + } + } + } + break; + + case TGSI_FILE_SYSTEM_VALUE: + { + unsigned idx; + for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { + ctx->load_system_value(ctx, idx, decl); + } + } + break; + + case TGSI_FILE_OUTPUT: + { + char name[16] = ""; + unsigned idx; + for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { + unsigned chan; + assert(idx < RADEON_LLVM_MAX_OUTPUTS); + if (ctx->soa.outputs[idx][0]) + continue; + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { +#ifdef DEBUG + snprintf(name, sizeof(name), "OUT%d.%c", + idx, "xyzw"[chan % 4]); +#endif + ctx->soa.outputs[idx][chan] = lp_build_alloca_undef( + &ctx->gallivm, + ctx->soa.bld_base.base.elem_type, name); + } + } + break; + } + + case TGSI_FILE_MEMORY: + ctx->declare_memory_region(ctx, decl); + break; + + default: + break; + } +} + +LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base, + LLVMValueRef value) +{ + struct lp_build_emit_data clamp_emit_data; + + memset(&clamp_emit_data, 0, sizeof(clamp_emit_data)); + clamp_emit_data.arg_count = 3; + clamp_emit_data.args[0] = value; + clamp_emit_data.args[2] = bld_base->base.one; + clamp_emit_data.args[1] = bld_base->base.zero; + + return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP, + &clamp_emit_data); +} + +void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info, + LLVMValueRef dst[4]) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + LLVMValueRef temp_ptr, temp_ptr2 = NULL; + unsigned chan, chan_index; + bool is_vec_store = false; + enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); + + if (dst[0]) { + LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); + is_vec_store = (k == LLVMVectorTypeKind); + } + + if (is_vec_store) { + LLVMValueRef values[4] = {}; + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { + LLVMValueRef index = lp_build_const_int32(gallivm, chan); + values[chan] = LLVMBuildExtractElement(gallivm->builder, + dst[0], index, ""); + } + bld_base->emit_store(bld_base, inst, info, values); + return; + } + + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + LLVMValueRef value = dst[chan_index]; + + if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) + continue; + if (inst->Instruction.Saturate) + value = radeon_llvm_saturate(bld_base, value); + + if (reg->Register.File == TGSI_FILE_ADDRESS) { + temp_ptr = bld->addr[reg->Register.Index][chan_index]; + LLVMBuildStore(builder, value, temp_ptr); + continue; + } + + if (!tgsi_type_is_64bit(dtype)) + value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); + + if (reg->Register.Indirect) { + unsigned file = reg->Register.File; + unsigned reg_index = reg->Register.Index; + store_value_to_array(bld_base, value, file, chan_index, + reg_index, ®->Indirect); + } else { + switch(reg->Register.File) { + case TGSI_FILE_OUTPUT: + temp_ptr = bld->outputs[reg->Register.Index][chan_index]; + if (tgsi_type_is_64bit(dtype)) + temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1]; + break; + + case TGSI_FILE_TEMPORARY: + { + if (reg->Register.Index >= ctx->temps_count) + continue; + + temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index]; + if (tgsi_type_is_64bit(dtype)) + temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1]; + + break; + } + default: + return; + } + if (!tgsi_type_is_64bit(dtype)) + LLVMBuildStore(builder, value, temp_ptr); + else { + LLVMValueRef ptr = LLVMBuildBitCast(builder, value, + LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), ""); + LLVMValueRef val2; + value = LLVMBuildExtractElement(builder, ptr, + bld_base->uint_bld.zero, ""); + val2 = LLVMBuildExtractElement(builder, ptr, + bld_base->uint_bld.one, ""); + + LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr); + LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2); + } + } + } +} + +static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc) +{ + char buf[32]; + /* Subtract 1 so that the number shown is that of the corresponding + * opcode in the TGSI dump, e.g. an if block has the same suffix as + * the instruction number of the corresponding TGSI IF. + */ + snprintf(buf, sizeof(buf), "%s%d", base, pc - 1); + LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf); +} + +/* Append a basic block at the level of the parent flow. + */ +static LLVMBasicBlockRef append_basic_block(struct radeon_llvm_context *ctx, + const char *name) +{ + struct gallivm_state *gallivm = &ctx->gallivm; + + assert(ctx->flow_depth >= 1); + + if (ctx->flow_depth >= 2) { + struct radeon_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2]; + + return LLVMInsertBasicBlockInContext(gallivm->context, + flow->next_block, name); + } + + return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name); +} + +/* Emit a branch to the given default target for the current block if + * applicable -- that is, if the current block does not already contain a + * branch from a break or continue. + */ +static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target) +{ + if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) + LLVMBuildBr(builder, target); +} + +static void bgnloop_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct radeon_llvm_flow *flow = push_flow(ctx); + flow->loop_entry_block = append_basic_block(ctx, "LOOP"); + flow->next_block = append_basic_block(ctx, "ENDLOOP"); + set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc); + LLVMBuildBr(gallivm->builder, flow->loop_entry_block); + LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block); +} + +static void brk_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct radeon_llvm_flow *flow = get_innermost_loop(ctx); + + LLVMBuildBr(gallivm->builder, flow->next_block); +} + +static void cont_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct radeon_llvm_flow *flow = get_innermost_loop(ctx); + + LLVMBuildBr(gallivm->builder, flow->loop_entry_block); +} + +static void else_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct radeon_llvm_flow *current_branch = get_current_flow(ctx); + LLVMBasicBlockRef endif_block; + + assert(!current_branch->loop_entry_block); + + endif_block = append_basic_block(ctx, "ENDIF"); + emit_default_branch(gallivm->builder, endif_block); + + LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block); + set_basicblock_name(current_branch->next_block, "else", bld_base->pc); + + current_branch->next_block = endif_block; +} + +static void endif_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct radeon_llvm_flow *current_branch = get_current_flow(ctx); + + assert(!current_branch->loop_entry_block); + + emit_default_branch(gallivm->builder, current_branch->next_block); + LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block); + set_basicblock_name(current_branch->next_block, "endif", bld_base->pc); + + ctx->flow_depth--; +} + +static void endloop_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct radeon_llvm_flow *current_loop = get_current_flow(ctx); + + assert(current_loop->loop_entry_block); + + emit_default_branch(gallivm->builder, current_loop->loop_entry_block); + + LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block); + set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc); + ctx->flow_depth--; +} + +static void if_cond_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data, + LLVMValueRef cond) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct radeon_llvm_flow *flow = push_flow(ctx); + LLVMBasicBlockRef if_block; + + if_block = append_basic_block(ctx, "IF"); + flow->next_block = append_basic_block(ctx, "ELSE"); + set_basicblock_name(if_block, "if", bld_base->pc); + LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block); + LLVMPositionBuilderAtEnd(gallivm->builder, if_block); +} + +static void if_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef cond; + + cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE, + emit_data->args[0], + bld_base->base.zero, ""); + + if_cond_emit(action, bld_base, emit_data, cond); +} + +static void uif_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef cond; + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, + bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]), + bld_base->int_bld.zero, ""); + + if_cond_emit(action, bld_base, emit_data, cond); +} + +static void emit_immediate(struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_immediate *imm) +{ + unsigned i; + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + + for (i = 0; i < 4; ++i) { + ctx->soa.immediates[ctx->soa.num_immediates][i] = + LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); + } + + ctx->soa.num_immediates++; +} + +void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple, + const struct tgsi_shader_info *info, + const struct tgsi_token *tokens) +{ + struct lp_type type; + + /* Initialize the gallivm object: + * We are only using the module, context, and builder fields of this struct. + * This should be enough for us to be able to pass our gallivm struct to the + * helper functions in the gallivm module. + */ + memset(&ctx->gallivm, 0, sizeof (ctx->gallivm)); + memset(&ctx->soa, 0, sizeof(ctx->soa)); + ctx->gallivm.context = LLVMContextCreate(); + ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", + ctx->gallivm.context); + LLVMSetTarget(ctx->gallivm.module, triple); + ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context); + + struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + + bld_base->info = info; + + if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) { + int size = info->array_max[TGSI_FILE_TEMPORARY]; + + ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); + ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0])); + + if (tokens) + tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size, + ctx->temp_arrays); + } + + type.floating = true; + type.fixed = false; + type.sign = true; + type.norm = false; + type.width = 32; + type.length = 1; + + lp_build_context_init(&bld_base->base, &ctx->gallivm, type); + lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); + lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); + type.width *= 2; + lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, type); + lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type)); + lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, lp_int_type(type)); + + bld_base->soa = 1; + bld_base->emit_store = radeon_llvm_emit_store; + bld_base->emit_swizzle = emit_swizzle; + bld_base->emit_declaration = emit_declaration; + bld_base->emit_immediate = emit_immediate; + + bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch; + bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch; + bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch; + bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch; + bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value; + + /* metadata allowing 2.5 ULP */ + ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context, + "fpmath", 6); + LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5); + ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context, + &arg, 1); + + /* Allocate outputs */ + ctx->soa.outputs = ctx->outputs; + + bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; + bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; + bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; + bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; + bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit; + bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; + bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; + bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; +} + +void radeon_llvm_create_func(struct radeon_llvm_context *ctx, + LLVMTypeRef *return_types, unsigned num_return_elems, + LLVMTypeRef *ParamTypes, unsigned ParamCount) +{ + LLVMTypeRef main_fn_type, ret_type; + LLVMBasicBlockRef main_fn_body; + + if (num_return_elems) + ret_type = LLVMStructTypeInContext(ctx->gallivm.context, + return_types, + num_return_elems, true); + else + ret_type = LLVMVoidTypeInContext(ctx->gallivm.context); + + /* Setup the function */ + ctx->return_type = ret_type; + main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0); + ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type); + main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context, + ctx->main_fn, "main_body"); + LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body); +} + +void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx, + bool run_verifier) +{ + struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; + const char *triple = LLVMGetTarget(gallivm->module); + LLVMTargetLibraryInfoRef target_library_info; + + /* Create the pass manager */ + gallivm->passmgr = LLVMCreateFunctionPassManagerForModule( + gallivm->module); + + target_library_info = gallivm_create_target_library_info(triple); + LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr); + + if (run_verifier) + LLVMAddVerifierPass(gallivm->passmgr); + + /* This pass should eliminate all the load and store instructions */ + LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); + + /* Add some optimization passes */ + LLVMAddScalarReplAggregatesPass(gallivm->passmgr); + LLVMAddLICMPass(gallivm->passmgr); + LLVMAddAggressiveDCEPass(gallivm->passmgr); + LLVMAddCFGSimplificationPass(gallivm->passmgr); + LLVMAddInstructionCombiningPass(gallivm->passmgr); + + /* Run the pass */ + LLVMInitializeFunctionPassManager(gallivm->passmgr); + LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn); + LLVMFinalizeFunctionPassManager(gallivm->passmgr); + + LLVMDisposeBuilder(gallivm->builder); + LLVMDisposePassManager(gallivm->passmgr); + gallivm_dispose_target_library_info(target_library_info); +} + +void radeon_llvm_dispose(struct radeon_llvm_context *ctx) +{ + LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module); + LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context); + FREE(ctx->temp_arrays); + ctx->temp_arrays = NULL; + FREE(ctx->temp_array_allocas); + ctx->temp_array_allocas = NULL; + FREE(ctx->temps); + ctx->temps = NULL; + ctx->temps_count = 0; + FREE(ctx->flow); + ctx->flow = NULL; + ctx->flow_depth_max = 0; +} -- cgit v1.2.3