summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2016-03-10 18:12:44 -0500
committerNicolai Hähnle <[email protected]>2016-03-21 15:34:25 -0500
commitd6fa650454db5e3308a5c3618e4586a2c8f537cb (patch)
treeb50f5bc4f65fc873ef433f396f7d78086afd85f8 /src/gallium/drivers/radeonsi
parentf7a85a8a0aae692303601c5359ba8e76d78e1c28 (diff)
radeonsi: Lower TGSI_OPCODE_MEMBAR down to LLVM op
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c31
1 files changed, 31 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b487a3f6d13..ca90178f075 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2713,6 +2713,35 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
+/* Prevent optimizations (at least of memory accesses) across the current
+ * point in the program by emitting empty inline assembly that is marked as
+ * having side effects.
+ */
+static void emit_optimization_barrier(struct si_shader_context *ctx)
+{
+ LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
+ LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+}
+
+static void membar_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ /* Since memoryBarrier only makes guarantees about atomics and
+ * coherent image accesses (which bypass TC L1), we do not need to emit
+ * any special cache handling here.
+ *
+ * We do have to prevent LLVM from re-ordering loads across
+ * the barrier though.
+ */
+ emit_optimization_barrier(ctx);
+}
+
static bool tgsi_is_array_sampler(unsigned target)
{
return target == TGSI_TEXTURE_1D_ARRAY ||
@@ -5315,6 +5344,8 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl;
bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
+ bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
+
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;