diff options
-rw-r--r-- | src/amd/common/ac_exp_param.h | 40 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 156 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 6 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_helper.cpp | 18 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_util.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 152 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 13 |
8 files changed, 235 insertions, 164 deletions
diff --git a/src/amd/common/ac_exp_param.h b/src/amd/common/ac_exp_param.h new file mode 100644 index 00000000000..b97ce8154e0 --- /dev/null +++ b/src/amd/common/ac_exp_param.h @@ -0,0 +1,40 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +#ifndef AC_EXP_PARAM_H +#define AC_EXP_PARAM_H + +enum { + /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ + AC_EXP_PARAM_OFFSET_0 = 0, + AC_EXP_PARAM_OFFSET_31 = 31, + /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ + AC_EXP_PARAM_DEFAULT_VAL_0000 = 64, + AC_EXP_PARAM_DEFAULT_VAL_0001, + AC_EXP_PARAM_DEFAULT_VAL_1110, + AC_EXP_PARAM_DEFAULT_VAL_1111, + AC_EXP_PARAM_UNDEFINED = 255, +}; + +#endif diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index d45094c8624..9729756de4e 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -33,11 +33,13 @@ #include <stdio.h> #include "ac_llvm_util.h" - +#include "ac_exp_param.h" #include "util/bitscan.h" #include "util/macros.h" #include "sid.h" +#include "shader_enums.h" + /* Initialize module-independent parts of the context. * * The caller is responsible for initializing ctx::module and ctx::builder. @@ -1244,3 +1246,155 @@ void ac_get_image_intr_name(const char *base_name, data_type_name, coords_type_name, rsrc_type_name); } } + +#define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3) +#define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5) + +/* Return true if the PARAM export has been eliminated. */ +static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, + uint32_t num_outputs, + LLVMValueRef inst, unsigned offset) +{ + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ + bool is_zero[4] = {}, is_one[4] = {}; + + for (i = 0; i < 4; i++) { + LLVMBool loses_info; + LLVMValueRef p = LLVMGetOperand(inst, AC_EXP_OUT0 + i); + + /* It's a constant expression. Undef outputs are eliminated too. */ + if (LLVMIsUndef(p)) { + is_zero[i] = true; + is_one[i] = true; + } else if (LLVMIsAConstantFP(p)) { + double a = LLVMConstRealGetDouble(p, &loses_info); + + if (a == 0) + is_zero[i] = true; + else if (a == 1) + is_one[i] = true; + else + return false; /* other constant */ + } else + return false; + } + + /* Only certain combinations of 0 and 1 can be eliminated. */ + if (is_zero[0] && is_zero[1] && is_zero[2]) + default_val = is_zero[3] ? 0 : 1; + else if (is_one[0] && is_one[1] && is_one[2]) + default_val = is_zero[3] ? 2 : 3; + else + return false; + + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ + LLVMInstructionEraseFromParent(inst); + + /* Change OFFSET to DEFAULT_VAL. */ + for (i = 0; i < num_outputs; i++) { + if (vs_output_param_offset[i] == offset) { + vs_output_param_offset[i] = + AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val; + break; + } + } + return true; +} + +struct ac_vs_exports { + unsigned num; + unsigned offset[VARYING_SLOT_MAX]; + LLVMValueRef inst[VARYING_SLOT_MAX]; +}; + +void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ctx, + LLVMValueRef main_fn, + uint8_t *vs_output_param_offset, + uint32_t num_outputs, + uint8_t *num_param_exports) +{ + LLVMBasicBlockRef bb; + bool removed_any = false; + struct ac_vs_exports exports; + + assert(num_outputs <= VARYING_SLOT_MAX); + exports.num = 0; + + /* Process all LLVM instructions. */ + bb = LLVMGetFirstBasicBlock(main_fn); + while (bb) { + LLVMValueRef inst = LLVMGetFirstInstruction(bb); + + while (inst) { + LLVMValueRef cur = inst; + inst = LLVMGetNextInstruction(inst); + + if (LLVMGetInstructionOpcode(cur) != LLVMCall) + continue; + + LLVMValueRef callee = ac_llvm_get_called_value(cur); + + if (!ac_llvm_is_function(callee)) + continue; + + const char *name = LLVMGetValueName(callee); + unsigned num_args = LLVMCountParams(callee); + + /* Check if this is an export instruction. */ + if ((num_args != 9 && num_args != 8) || + (strcmp(name, "llvm.SI.export") && + strcmp(name, "llvm.amdgcn.exp.f32"))) + continue; + + LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET); + unsigned target = LLVMConstIntGetZExtValue(arg); + + if (target < V_008DFC_SQ_EXP_PARAM) + continue; + + target -= V_008DFC_SQ_EXP_PARAM; + + /* Eliminate constant value PARAM exports. */ + if (ac_eliminate_const_output(vs_output_param_offset, + num_outputs, cur, target)) { + removed_any = true; + } else { + exports.offset[exports.num] = target; + exports.inst[exports.num] = cur; + exports.num++; + } + } + bb = LLVMGetNextBasicBlock(bb); + } + + /* Remove holes in export memory due to removed PARAM exports. + * This is done by renumbering all PARAM exports. + */ + if (removed_any) { + uint8_t current_offset[VARYING_SLOT_MAX]; + unsigned new_count = 0; + unsigned out, i; + + /* Make a copy of the offsets. We need the old version while + * we are modifying some of them. */ + memcpy(current_offset, vs_output_param_offset, + sizeof(current_offset)); + + for (i = 0; i < exports.num; i++) { + unsigned offset = exports.offset[i]; + + for (out = 0; out < num_outputs; out++) { + if (current_offset[out] != offset) + continue; + + LLVMSetOperand(exports.inst[i], AC_EXP_TARGET, + LLVMConstInt(ctx->i32, + V_008DFC_SQ_EXP_PARAM + new_count, 0)); + vs_output_param_offset[out] = new_count; + new_count++; + break; + } + } + *num_param_exports = new_count; + } +} diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index d6edcded331..1c3610a5e69 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -239,6 +239,12 @@ void ac_get_image_intr_name(const char *base_name, LLVMTypeRef coords_type, LLVMTypeRef rsrc_type, char *out_name, unsigned out_len); + +void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ac, + LLVMValueRef main_fn, + uint8_t *vs_output_param_offset, + uint32_t num_outputs, + uint8_t *num_param_exports); #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp index 11fa80920d6..03877d5ee1e 100644 --- a/src/amd/common/ac_llvm_helper.cpp +++ b/src/amd/common/ac_llvm_helper.cpp @@ -61,3 +61,21 @@ bool ac_is_sgpr_param(LLVMValueRef arg) return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) || AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg); } + +LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call) +{ +#if HAVE_LLVM >= 0x0309 + return LLVMGetCalledValue(call); +#else + return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue()); +#endif +} + +bool ac_llvm_is_function(LLVMValueRef v) +{ +#if HAVE_LLVM >= 0x0309 + return LLVMGetValueKind(v) == LLVMFunctionValueKind; +#else + return llvm::isa<llvm::Function>(llvm::unwrap(v)); +#endif +} diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index faecf1efd01..38e7dde5b63 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -64,6 +64,8 @@ void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask); void ac_dump_module(LLVMModuleRef module); +LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call); +bool ac_llvm_is_function(LLVMValueRef v); #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 69634b12c33..125affbaeb1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -41,6 +41,7 @@ #include "ac_binary.h" #include "ac_llvm_util.h" +#include "ac_exp_param.h" #include "si_shader_internal.h" #include "si_pipe.h" #include "sid.h" @@ -6809,76 +6810,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; } -#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3) -#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5) - -/* Return true if the PARAM export has been eliminated. */ -static bool si_eliminate_const_output(struct si_shader_context *ctx, - LLVMValueRef inst, unsigned offset) -{ - struct si_shader *shader = ctx->shader; - unsigned num_outputs = shader->selector->info.num_outputs; - unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ - bool is_zero[4] = {}, is_one[4] = {}; - - for (i = 0; i < 4; i++) { - LLVMBool loses_info; - LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i); - - /* It's a constant expression. Undef outputs are eliminated too. */ - if (LLVMIsUndef(p)) { - is_zero[i] = true; - is_one[i] = true; - } else if (LLVMIsAConstantFP(p)) { - double a = LLVMConstRealGetDouble(p, &loses_info); - - if (a == 0) - is_zero[i] = true; - else if (a == 1) - is_one[i] = true; - else - return false; /* other constant */ - } else - return false; - } - - /* Only certain combinations of 0 and 1 can be eliminated. */ - if (is_zero[0] && is_zero[1] && is_zero[2]) - default_val = is_zero[3] ? 0 : 1; - else if (is_one[0] && is_one[1] && is_one[2]) - default_val = is_zero[3] ? 2 : 3; - else - return false; - - /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ - LLVMInstructionEraseFromParent(inst); - - /* Change OFFSET to DEFAULT_VAL. */ - for (i = 0; i < num_outputs; i++) { - if (shader->info.vs_output_param_offset[i] == offset) { - shader->info.vs_output_param_offset[i] = - EXP_PARAM_DEFAULT_VAL_0000 + default_val; - break; - } - } - return true; -} - -struct si_vs_exports { - unsigned num; - unsigned offset[SI_MAX_VS_OUTPUTS]; - LLVMValueRef inst[SI_MAX_VS_OUTPUTS]; -}; - static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) { struct si_shader *shader = ctx->shader; struct tgsi_shader_info *info = &shader->selector->info; - LLVMBasicBlockRef bb; - struct si_vs_exports exports; - bool removed_any = false; - - exports.num = 0; if (ctx->type == PIPE_SHADER_FRAGMENT || ctx->type == PIPE_SHADER_COMPUTE || @@ -6886,84 +6821,11 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) shader->key.as_ls) return; - /* Process all LLVM instructions. */ - bb = LLVMGetFirstBasicBlock(ctx->main_fn); - while (bb) { - LLVMValueRef inst = LLVMGetFirstInstruction(bb); - - while (inst) { - LLVMValueRef cur = inst; - inst = LLVMGetNextInstruction(inst); - - if (LLVMGetInstructionOpcode(cur) != LLVMCall) - continue; - - LLVMValueRef callee = lp_get_called_value(cur); - - if (!lp_is_function(callee)) - continue; - - const char *name = LLVMGetValueName(callee); - unsigned num_args = LLVMCountParams(callee); - - /* Check if this is an export instruction. */ - if ((num_args != 9 && num_args != 8) || - (strcmp(name, "llvm.SI.export") && - strcmp(name, "llvm.amdgcn.exp.f32"))) - continue; - - LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET); - unsigned target = LLVMConstIntGetZExtValue(arg); - - if (target < V_008DFC_SQ_EXP_PARAM) - continue; - - target -= V_008DFC_SQ_EXP_PARAM; - - /* Eliminate constant value PARAM exports. */ - if (si_eliminate_const_output(ctx, cur, target)) { - removed_any = true; - } else { - exports.offset[exports.num] = target; - exports.inst[exports.num] = cur; - exports.num++; - } - } - bb = LLVMGetNextBasicBlock(bb); - } - - /* Remove holes in export memory due to removed PARAM exports. - * This is done by renumbering all PARAM exports. - */ - if (removed_any) { - ubyte current_offset[SI_MAX_VS_OUTPUTS]; - unsigned new_count = 0; - unsigned out, i; - - /* Make a copy of the offsets. We need the old version while - * we are modifying some of them. */ - assert(sizeof(current_offset) == - sizeof(shader->info.vs_output_param_offset)); - memcpy(current_offset, shader->info.vs_output_param_offset, - sizeof(current_offset)); - - for (i = 0; i < exports.num; i++) { - unsigned offset = exports.offset[i]; - - for (out = 0; out < info->num_outputs; out++) { - if (current_offset[out] != offset) - continue; - - LLVMSetOperand(exports.inst[i], EXP_TARGET, - LLVMConstInt(ctx->i32, - V_008DFC_SQ_EXP_PARAM + new_count, 0)); - shader->info.vs_output_param_offset[out] = new_count; - new_count++; - break; - } - } - shader->info.nr_param_exports = new_count; - } + ac_eliminate_const_vs_outputs(&ctx->ac, + ctx->main_fn, + shader->info.vs_output_param_offset, + info->num_outputs, + &shader->info.nr_param_exports); } static void si_count_scratch_private_memory(struct si_shader_context *ctx) @@ -7537,7 +7399,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, si_init_shader_ctx(&ctx, sscreen, shader, tm); ctx.separate_prolog = !is_monolithic; - memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED, + memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(shader->info.vs_output_param_offset)); shader->info.uses_instanceid = sel->info.uses_instanceid; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 05c0e623b4a..3c01a3d3031 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -482,18 +482,6 @@ struct si_shader_config { unsigned rsrc2; }; -enum { - /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ - EXP_PARAM_OFFSET_0 = 0, - EXP_PARAM_OFFSET_31 = 31, - /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ - EXP_PARAM_DEFAULT_VAL_0000 = 64, - EXP_PARAM_DEFAULT_VAL_0001, - EXP_PARAM_DEFAULT_VAL_1110, - EXP_PARAM_DEFAULT_VAL_1111, - EXP_PARAM_UNDEFINED = 255, -}; - /* GCN-specific shader info. */ struct si_shader_info { ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8afc7314918..06ea99c05c3 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -39,6 +39,7 @@ #include "util/disk_cache.h" #include "util/mesa-sha1.h" +#include "ac_exp_param.h" /* SHADER_CACHE */ @@ -1506,7 +1507,7 @@ void si_init_shader_selector_async(void *job, int thread_index) for (i = 0; i < sel->info.num_outputs; i++) { unsigned offset = shader->info.vs_output_param_offset[i]; - if (offset <= EXP_PARAM_OFFSET_31) + if (offset <= AC_EXP_PARAM_OFFSET_31) continue; unsigned name = sel->info.output_semantic_name[i]; @@ -2001,18 +2002,18 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, index == vsinfo->output_semantic_index[j]) { offset = vs->info.vs_output_param_offset[j]; - if (offset <= EXP_PARAM_OFFSET_31) { + if (offset <= AC_EXP_PARAM_OFFSET_31) { /* The input is loaded from parameter memory. */ ps_input_cntl |= S_028644_OFFSET(offset); } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) { - if (offset == EXP_PARAM_UNDEFINED) { + if (offset == AC_EXP_PARAM_UNDEFINED) { /* This can happen with depth-only rendering. */ offset = 0; } else { /* The input is a DEFAULT_VAL constant. */ - assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 && - offset <= EXP_PARAM_DEFAULT_VAL_1111); - offset -= EXP_PARAM_DEFAULT_VAL_0000; + assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && + offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); + offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; } ps_input_cntl = S_028644_OFFSET(0x20) | |