diff options
author | Jason Ekstrand <[email protected]> | 2020-04-22 21:35:48 -0500 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-04-24 11:37:03 +0000 |
commit | fdf9b674ee3a17c98fd266750dec3475910542f6 (patch) | |
tree | 882a3e54957b99b1d3f596726c002d7b1ceda065 /src/compiler | |
parent | 9c009da208b77496011f149fd1e289656da0f226 (diff) |
nir/lower_subgroups: Mask off unused bits in ballot ops
Thanks to VK_EXT_subgroup_size_control, we can end up with
gl_SubgroupSize being as low as 8 on Intel.
Fixes: d10de253097 "anv: Implement VK_EXT_subgroup_size_control"
Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4694>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/nir/nir_lower_subgroups.c | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index f5eebb85144..2a7361b7472 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -446,6 +446,32 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options) assert(intrin->src[0].is_ssa); nir_ssa_def *int_val = ballot_type_to_uint(b, intrin->src[0].ssa, options->ballot_bit_size); + + if (intrin->intrinsic != nir_intrinsic_ballot_bitfield_extract && + intrin->intrinsic != nir_intrinsic_ballot_find_lsb) { + /* For OpGroupNonUniformBallotFindMSB, the SPIR-V Spec says: + * + * "Find the most significant bit set to 1 in Value, considering + * only the bits in Value required to represent all bits of the + * group’s invocations. If none of the considered bits is set to + * 1, the result is undefined." + * + * It has similar text for the other three. This means that, in case + * the subgroup size is less than 32, we have to mask off the unused + * bits. If the subgroup size is fixed and greater than or equal to + * 32, the mask will be 0xffffffff and nir_opt_algebraic will delete + * the iand. + * + * We only have to worry about this for BitCount and FindMSB because + * FindLSB counts from the bottom and BitfieldExtract selects + * individual bits. In either case, if run outside the range of + * valid bits, we hit the undefined results case and we can return + * anything we want. + */ + int_val = nir_iand(b, int_val, + build_subgroup_mask(b, options->ballot_bit_size, options)); + } + switch (intrin->intrinsic) { case nir_intrinsic_ballot_bitfield_extract: assert(intrin->src[1].is_ssa); |