diff options
author | Nicolai Hähnle <[email protected]> | 2017-03-31 13:04:34 +0200 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2017-04-05 15:29:43 +0200 |
commit | 24d4fbe226c1f5b9a216d05c25737b4b14391975 (patch) | |
tree | 248d40d5f708aab2d88bd95cff03a1ddc94c3fdf /src/gallium/drivers/radeonsi | |
parent | 5c4602f4a259dae639457554f377bc35f800083f (diff) |
radeonsi: strengthen emit_optimization_barrier
LLVM will lift inline assembly out of if-else-blocks if both paths have
the same inline assembly. Prevent this by adding an irrelevant unique
text to the assembly.
This requires the LLVM assembly parser to be initialized.
Furthermore, allow forcing subsequent computations to happen after the
optimization barrier by defining a data dependency.
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 39 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 3 |
2 files changed, 38 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 541dc7b943e..082e29111bd 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -72,6 +72,8 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, static void si_dump_shader_key(unsigned shader, struct si_shader_key *key, FILE *f); +static unsigned llvm_get_type_size(LLVMTypeRef type); + static void si_build_vs_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key); static void si_build_vs_epilog_function(struct si_shader_context *ctx, @@ -3125,14 +3127,43 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, /* Prevent optimizations (at least of memory accesses) across the current * point in the program by emitting empty inline assembly that is marked as * having side effects. + * + * Optionally, a value can be passed through the inline assembly to prevent + * LLVM from hoisting calls to ReadNone functions. */ #if 0 /* unused currently */ -static void emit_optimization_barrier(struct si_shader_context *ctx) +static void emit_optimization_barrier(struct si_shader_context *ctx, + LLVMValueRef *pvgpr) { + static int counter = 0; + LLVMBuilderRef builder = ctx->gallivm.builder; - LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); - LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false); - LLVMBuildCall(builder, inlineasm, NULL, 0, ""); + char code[16]; + + snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter)); + + if (!pvgpr) { + LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); + LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false); + LLVMBuildCall(builder, inlineasm, NULL, 0, ""); + } else { + LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); + LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false); + LLVMValueRef vgpr = *pvgpr; + LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr); + unsigned vgpr_size = llvm_get_type_size(vgpr_type); + LLVMValueRef vgpr0; + + assert(vgpr_size % 4 == 0); + + vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), ""); + vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, ""); + vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, ""); + vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, ""); + vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, ""); + + *pvgpr = vgpr; + } } #endif diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 3b8951cd942..7218d2da114 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -126,6 +126,9 @@ static void init_amdgpu_target() LLVMInitializeAMDGPUTargetMC(); LLVMInitializeAMDGPUAsmPrinter(); + /* For inline assembly. */ + LLVMInitializeAMDGPUAsmParser(); + if (HAVE_LLVM >= 0x0400) { /* * Workaround for bug in llvm 4.0 that causes image intrinsics |