diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2016-04-17 17:23:19 -0500 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2016-04-27 10:54:05 -0500 |
commit | 836cab51c8a5355a01a8ebedf99c55ff7c75e6e8 (patch) | |
tree | c1f9d6c9d218f232f89c53f5561b64b42951124c | |
parent | e7201bd31b4c7beb1a2d0c4659477946e263d5cf (diff) |
radeonsi: emit s_waitcnt for shader memory barriers and volatile
Turns out that this is needed after all to satisfy some strengthened
coherency tests. Depends on support in LLVM, added in r267729.
v2: updated to reflect changes to the LLVM intrinsic
Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v1)
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5e090616531..1d6dc7886ba 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2891,6 +2891,17 @@ static void emit_optimization_barrier(struct si_shader_context *ctx) LLVMBuildCall(builder, inlineasm, NULL, 0, ""); } +static void emit_waitcnt(struct si_shader_context *ctx) +{ + struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef args[1] = { + lp_build_const_int32(gallivm, 0xf70) + }; + lp_build_intrinsic(builder, "llvm.amdgcn.s.waitcnt", + ctx->voidt, args, 1, LLVMNoUnwindAttribute); +} + static void membar_emit( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, @@ -2898,14 +2909,7 @@ static void membar_emit( { struct si_shader_context *ctx = si_shader_context(bld_base); - /* Since memoryBarrier only makes guarantees about atomics and - * coherent image accesses (which bypass TC L1), we do not need to emit - * any special cache handling here. - * - * We do have to prevent LLVM from re-ordering loads across - * the barrier though. - */ - emit_optimization_barrier(ctx); + emit_waitcnt(ctx); } static LLVMValueRef @@ -3265,7 +3269,7 @@ static void load_emit( } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - emit_optimization_barrier(ctx); + emit_waitcnt(ctx); if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { load_emit_buffer(ctx, emit_data); @@ -3455,6 +3459,7 @@ static void store_emit( struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { + struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; const struct tgsi_full_instruction * inst = emit_data->inst; @@ -3462,11 +3467,16 @@ static void store_emit( char intrinsic_name[32]; char coords_type[8]; - if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { - store_emit_buffer(si_shader_context(bld_base), emit_data); + if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { + store_emit_memory(ctx, emit_data); return; - } else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { - store_emit_memory(si_shader_context(bld_base), emit_data); + } + + if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) + emit_waitcnt(ctx); + + if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { + store_emit_buffer(ctx, emit_data); return; } |