summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/compiler/nir/nir.h2
-rw-r--r--src/compiler/nir/nir_opt_intrinsics.c18
-rw-r--r--src/intel/compiler/brw_compiler.c1
3 files changed, 21 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 5ddab57776f..78684fd50c8 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1843,6 +1843,8 @@ typedef struct nir_shader_compiler_options {
*/
bool use_interpolated_input_intrinsics;
+ unsigned max_subgroup_size;
+
unsigned max_unroll_iterations;
} nir_shader_compiler_options;
diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c
index 4f36166510b..f12dc8779cb 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -62,6 +62,24 @@ opt_intrinsics_impl(nir_function_impl *impl)
replacement = nir_imm_int(&b, NIR_TRUE);
break;
}
+ case nir_intrinsic_ballot: {
+ assert(b.shader->options->max_subgroup_size != 0);
+ if (b.shader->options->max_subgroup_size > 32 ||
+ intrin->dest.ssa.bit_size <= 32)
+ continue;
+
+ nir_intrinsic_instr *ballot =
+ nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
+ nir_ssa_dest_init(&ballot->instr, &ballot->dest, 1, 32, NULL);
+ nir_src_copy(&ballot->src[0], &intrin->src[0], ballot);
+
+ nir_builder_instr_insert(&b, &ballot->instr);
+
+ replacement = nir_pack_64_2x32_split(&b,
+ &ballot->dest.ssa,
+ nir_imm_int(&b, 0));
+ break;
+ }
case nir_intrinsic_load_subgroup_eq_mask:
case nir_intrinsic_load_subgroup_ge_mask:
case nir_intrinsic_load_subgroup_gt_mask:
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 39a8237ff07..e86ab0fc687 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -58,6 +58,7 @@ static const struct nir_shader_compiler_options scalar_nir_options = {
.lower_unpack_unorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_subgroup_masks = true,
+ .max_subgroup_size = 64, /* FIXME */
.max_unroll_iterations = 32,
};