summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/common/ac_llvm_util.c31
-rw-r--r--src/amd/common/ac_llvm_util.h3
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c32
3 files changed, 38 insertions, 28 deletions
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index b88c4e4979f..3530bf088be 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -24,10 +24,12 @@
*/
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
#include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
#include "util/bitscan.h"
#include <llvm-c/Core.h>
#include <llvm-c/Support.h>
#include "c11/threads.h"
+#include "util/u_math.h"
#include <assert.h>
#include <stdio.h>
@@ -207,3 +209,32 @@ ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
snprintf(str, sizeof(str), "%i", value);
LLVMAddTargetDependentFunctionAttr(F, name, str);
}
+
+unsigned
+ac_count_scratch_private_memory(LLVMValueRef function)
+{
+ unsigned private_mem_vgprs = 0;
+
+ /* Process all LLVM instructions. */
+ LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
+ while (bb) {
+ LLVMValueRef next = LLVMGetFirstInstruction(bb);
+
+ while (next) {
+ LLVMValueRef inst = next;
+ next = LLVMGetNextInstruction(next);
+
+ if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
+ continue;
+
+ LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
+ /* No idea why LLVM aligns allocas to 4 elements. */
+ unsigned alignment = LLVMGetAlignment(inst);
+ unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
+ private_mem_vgprs += dw_size;
+ }
+ bb = LLVMGetNextBasicBlock(bb);
+ }
+
+ return private_mem_vgprs;
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 3cf385a33ed..5329bb1b702 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -105,6 +105,9 @@ ac_get_store_intr_attribs(bool writeonly_memory)
AC_FUNC_ATTR_WRITEONLY;
}
+unsigned
+ac_count_scratch_private_memory(LLVMValueRef function);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 2ae2544e3f7..2e57eca6e54 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5981,32 +5981,6 @@ static void si_optimize_vs_outputs(struct si_shader_context *ctx)
&shader->info.nr_param_exports);
}
-static void si_count_scratch_private_memory(struct si_shader_context *ctx)
-{
- ctx->shader->config.private_mem_vgprs = 0;
-
- /* Process all LLVM instructions. */
- LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(ctx->main_fn);
- while (bb) {
- LLVMValueRef next = LLVMGetFirstInstruction(bb);
-
- while (next) {
- LLVMValueRef inst = next;
- next = LLVMGetNextInstruction(next);
-
- if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
- continue;
-
- LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
- /* No idea why LLVM aligns allocas to 4 elements. */
- unsigned alignment = LLVMGetAlignment(inst);
- unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
- ctx->shader->config.private_mem_vgprs += dw_size;
- }
- bb = LLVMGetNextBasicBlock(bb);
- }
-}
-
static void si_init_exec_from_input(struct si_shader_context *ctx,
unsigned param, unsigned bitoffset)
{
@@ -6953,8 +6927,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
si_optimize_vs_outputs(&ctx);
if ((debug && debug->debug_message) ||
- si_can_dump_shader(sscreen, ctx.type))
- si_count_scratch_private_memory(&ctx);
+ si_can_dump_shader(sscreen, ctx.type)) {
+ ctx.shader->config.private_mem_vgprs =
+ ac_count_scratch_private_memory(ctx.main_fn);
+ }
/* Compile to bytecode. */
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,