summaryrefslogtreecommitdiffstats
path: root/src/compiler/nir/nir_lower_subgroups.c
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-03-10 10:05:58 -0800
committerJason Ekstrand <[email protected]>2018-03-13 13:25:15 -0700
commit3d1d7e856193f7ccc6259d21fe55993337f030c7 (patch)
treebb0823cb390ef4f03e124dfb569e769c41a1aced /src/compiler/nir/nir_lower_subgroups.c
parent8247a30838a74dcdd27cc2468bff8a3d8def640e (diff)
nir/subgroups: Add lowering for vote_ieq/vote_feq to a ballot
This is based heavily on 97f10934edf8ac, "ac/nir: Add vote_ieq/vote_feq lowering pass." from Bas Nieuwenhuizen. This version is a bit more general since it's in common code. It also properly handles NaN due to not flipping the comparison for floats. Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir_lower_subgroups.c')
-rw-r--r--src/compiler/nir/nir_lower_subgroups.c48
1 files changed, 48 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c
index f18ad00c370..0d3c83b7951 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -142,6 +142,51 @@ lower_vote_eq_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
}
static nir_ssa_def *
+lower_vote_eq_to_ballot(nir_builder *b, nir_intrinsic_instr *intrin,
+ const nir_lower_subgroups_options *options)
+{
+ assert(intrin->src[0].is_ssa);
+ nir_ssa_def *value = intrin->src[0].ssa;
+
+ /* We have to implicitly lower to scalar */
+ nir_ssa_def *all_eq = NULL;
+ for (unsigned i = 0; i < intrin->num_components; i++) {
+ nir_intrinsic_instr *rfi =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_read_first_invocation);
+ nir_ssa_dest_init(&rfi->instr, &rfi->dest,
+ 1, value->bit_size, NULL);
+ rfi->num_components = 1;
+ rfi->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
+ nir_builder_instr_insert(b, &rfi->instr);
+
+ nir_ssa_def *is_eq;
+ if (intrin->intrinsic == nir_intrinsic_vote_feq) {
+ is_eq = nir_feq(b, &rfi->dest.ssa, nir_channel(b, value, i));
+ } else {
+ is_eq = nir_ieq(b, &rfi->dest.ssa, nir_channel(b, value, i));
+ }
+
+ if (all_eq == NULL) {
+ all_eq = is_eq;
+ } else {
+ all_eq = nir_iand(b, all_eq, is_eq);
+ }
+ }
+
+ nir_intrinsic_instr *ballot =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_ballot);
+ nir_ssa_dest_init(&ballot->instr, &ballot->dest,
+ 1, options->ballot_bit_size, NULL);
+ ballot->num_components = 1;
+ ballot->src[0] = nir_src_for_ssa(nir_inot(b, all_eq));
+ nir_builder_instr_insert(b, &ballot->instr);
+
+ return nir_ieq(b, &ballot->dest.ssa,
+ nir_imm_intN_t(b, 0, options->ballot_bit_size));
+}
+
+static nir_ssa_def *
lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
bool lower_to_scalar)
{
@@ -219,6 +264,9 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
if (options->lower_vote_trivial)
return nir_imm_int(b, NIR_TRUE);
+ if (options->lower_vote_eq_to_ballot)
+ return lower_vote_eq_to_ballot(b, intrin, options);
+
if (options->lower_to_scalar && intrin->num_components > 1)
return lower_vote_eq_to_scalar(b, intrin);
break;