summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2016-04-17 17:23:19 -0500
committerNicolai Hähnle <nicolai.haehnle@amd.com>2016-04-27 10:54:05 -0500
commit836cab51c8a5355a01a8ebedf99c55ff7c75e6e8 (patch)
treec1f9d6c9d218f232f89c53f5561b64b42951124c
parente7201bd31b4c7beb1a2d0c4659477946e263d5cf (diff)
radeonsi: emit s_waitcnt for shader memory barriers and volatile
Turns out that this is needed after all to satisfy some strengthened coherency tests. Depends on support in LLVM, added in r267729. v2: updated to reflect changes to the LLVM intrinsic Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v1)
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c36
1 files changed, 23 insertions, 13 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 5e090616531..1d6dc7886ba 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2891,6 +2891,17 @@ static void emit_optimization_barrier(struct si_shader_context *ctx)
LLVMBuildCall(builder, inlineasm, NULL, 0, "");
}
+static void emit_waitcnt(struct si_shader_context *ctx)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef args[1] = {
+ lp_build_const_int32(gallivm, 0xf70)
+ };
+ lp_build_intrinsic(builder, "llvm.amdgcn.s.waitcnt",
+ ctx->voidt, args, 1, LLVMNoUnwindAttribute);
+}
+
static void membar_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
@@ -2898,14 +2909,7 @@ static void membar_emit(
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- /* Since memoryBarrier only makes guarantees about atomics and
- * coherent image accesses (which bypass TC L1), we do not need to emit
- * any special cache handling here.
- *
- * We do have to prevent LLVM from re-ordering loads across
- * the barrier though.
- */
- emit_optimization_barrier(ctx);
+ emit_waitcnt(ctx);
}
static LLVMValueRef
@@ -3265,7 +3269,7 @@ static void load_emit(
}
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
- emit_optimization_barrier(ctx);
+ emit_waitcnt(ctx);
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
load_emit_buffer(ctx, emit_data);
@@ -3455,6 +3459,7 @@ static void store_emit(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
@@ -3462,11 +3467,16 @@ static void store_emit(
char intrinsic_name[32];
char coords_type[8];
- if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
- store_emit_buffer(si_shader_context(bld_base), emit_data);
+ if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
+ store_emit_memory(ctx, emit_data);
return;
- } else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
- store_emit_memory(si_shader_context(bld_base), emit_data);
+ }
+
+ if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
+ emit_waitcnt(ctx);
+
+ if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
+ store_emit_buffer(ctx, emit_data);
return;
}