summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2017-03-29 20:29:37 +0200
committerNicolai Hähnle <[email protected]>2017-03-31 07:56:27 +0200
commit02112c3ef7716955c889b6d10fb569b028a33f0e (patch)
treeafcb92b4cbc7efe79c291db4dea5905347d3d8b1
parentcd3f38606962977821913792e841da6444244baa (diff)
radeonsi: implement ARB_shader_group_vote
Reviewed-by: Marek Olšák <[email protected]>
-rw-r--r--docs/features.txt2
-rw-r--r--docs/relnotes/17.1.0.html1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c82
4 files changed, 87 insertions, 2 deletions
diff --git a/docs/features.txt b/docs/features.txt
index d707f011850..1e145e1ddad 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -295,7 +295,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_ARB_shader_ballot not started
GL_ARB_shader_clock DONE (i965/gen7+, radeonsi)
GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi)
- GL_ARB_shader_group_vote DONE (nvc0)
+ GL_ARB_shader_group_vote DONE (nvc0, radeonsi)
GL_ARB_shader_stencil_export DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr)
GL_ARB_shader_viewport_layer_array DONE (i965/gen6+)
GL_ARB_sparse_buffer not started
diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
index c2a0166f72c..a11a37ffb8c 100644
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers.
<ul>
<li>GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe</li>
<li>GL_ARB_shader_clock on radeonsi</li>
+<li>GL_ARB_shader_group_vote on radeonsi</li>
<li>GL_ARB_transform_feedback2 on i965/gen6</li>
<li>GL_ARB_transform_feedback_overflow_query on i965/gen6+</li>
<li>Geometry shaders enabled on swr</li>
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 2369471aa58..8aae11d3be4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -424,6 +424,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_CLOCK:
return HAVE_LLVM >= 0x0309;
+ case PIPE_CAP_TGSI_VOTE:
+ return HAVE_LLVM >= 0x0400;
+
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
@@ -480,7 +483,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
- case PIPE_CAP_TGSI_VOTE:
case PIPE_CAP_MAX_WINDOW_RECTANGLES:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_TGSI_FS_FBFETCH:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index ec063ad9028..874535a6b77 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5125,6 +5125,84 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
}
}
+static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
+ LLVMValueRef value)
+{
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMValueRef args[3] = {
+ value,
+ ctx->i32_0,
+ LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+ };
+
+ if (LLVMTypeOf(value) != ctx->i32)
+ args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
+
+ return lp_build_intrinsic(gallivm->builder,
+ "llvm.amdgcn.icmp.i32",
+ ctx->i64, args, 3,
+ LP_FUNC_ATTR_NOUNWIND |
+ LP_FUNC_ATTR_READNONE |
+ LP_FUNC_ATTR_CONVERGENT);
+}
+
+static void vote_all_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMValueRef active_set, vote_set;
+ LLVMValueRef tmp;
+
+ active_set = si_emit_ballot(ctx, ctx->i32_1);
+ vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+ tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_any_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMValueRef vote_set;
+ LLVMValueRef tmp;
+
+ vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+ tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+ vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_eq_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMValueRef active_set, vote_set;
+ LLVMValueRef all, none, tmp;
+
+ active_set = si_emit_ballot(ctx, ctx->i32_1);
+ vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+ all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+ none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+ vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+ tmp = LLVMBuildOr(gallivm->builder, all, none, "");
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
@@ -6574,6 +6652,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
+ bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
+ bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
+
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;