diff options
-rw-r--r-- | src/amd/common/ac_llvm_util.c | 31 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_util.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 32 |
3 files changed, 38 insertions, 28 deletions
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index b88c4e4979f..3530bf088be 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -24,10 +24,12 @@ */ /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ #include "ac_llvm_util.h" +#include "ac_llvm_build.h" #include "util/bitscan.h" #include <llvm-c/Core.h> #include <llvm-c/Support.h> #include "c11/threads.h" +#include "util/u_math.h" #include <assert.h> #include <stdio.h> @@ -207,3 +209,32 @@ ac_llvm_add_target_dep_function_attr(LLVMValueRef F, snprintf(str, sizeof(str), "%i", value); LLVMAddTargetDependentFunctionAttr(F, name, str); } + +unsigned +ac_count_scratch_private_memory(LLVMValueRef function) +{ + unsigned private_mem_vgprs = 0; + + /* Process all LLVM instructions. */ + LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function); + while (bb) { + LLVMValueRef next = LLVMGetFirstInstruction(bb); + + while (next) { + LLVMValueRef inst = next; + next = LLVMGetNextInstruction(next); + + if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) + continue; + + LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); + /* No idea why LLVM aligns allocas to 4 elements. */ + unsigned alignment = LLVMGetAlignment(inst); + unsigned dw_size = align(ac_get_type_size(type) / 4, alignment); + private_mem_vgprs += dw_size; + } + bb = LLVMGetNextBasicBlock(bb); + } + + return private_mem_vgprs; +} diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 3cf385a33ed..5329bb1b702 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -105,6 +105,9 @@ ac_get_store_intr_attribs(bool writeonly_memory) AC_FUNC_ATTR_WRITEONLY; } +unsigned +ac_count_scratch_private_memory(LLVMValueRef function); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2ae2544e3f7..2e57eca6e54 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5981,32 +5981,6 @@ static void si_optimize_vs_outputs(struct si_shader_context *ctx) &shader->info.nr_param_exports); } -static void si_count_scratch_private_memory(struct si_shader_context *ctx) -{ - ctx->shader->config.private_mem_vgprs = 0; - - /* Process all LLVM instructions. */ - LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(ctx->main_fn); - while (bb) { - LLVMValueRef next = LLVMGetFirstInstruction(bb); - - while (next) { - LLVMValueRef inst = next; - next = LLVMGetNextInstruction(next); - - if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) - continue; - - LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); - /* No idea why LLVM aligns allocas to 4 elements. */ - unsigned alignment = LLVMGetAlignment(inst); - unsigned dw_size = align(ac_get_type_size(type) / 4, alignment); - ctx->shader->config.private_mem_vgprs += dw_size; - } - bb = LLVMGetNextBasicBlock(bb); - } -} - static void si_init_exec_from_input(struct si_shader_context *ctx, unsigned param, unsigned bitoffset) { @@ -6953,8 +6927,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, si_optimize_vs_outputs(&ctx); if ((debug && debug->debug_message) || - si_can_dump_shader(sscreen, ctx.type)) - si_count_scratch_private_memory(&ctx); + si_can_dump_shader(sscreen, ctx.type)) { + ctx.shader->config.private_mem_vgprs = + ac_count_scratch_private_memory(ctx.main_fn); + } /* Compile to bytecode. */ r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm, |