summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2019-02-22 10:48:39 -0600
committerJason Ekstrand <[email protected]>2019-07-24 12:55:40 -0500
commitc84b8eeeac30ec033847a3e13d05d1162e78daf3 (patch)
tree2fba023f049627a457343e8664566852f6baee9f /src/intel/compiler
parent1981460af2c17d55c17be2475a28bbf711a80c79 (diff)
intel/compiler: Be more conservative about subgroup sizes in GL
The rules for gl_SubgroupSize in Vulkan require that it be a constant that can be queried through the API. However, all GL requires is that it's a uniform. Instead of always claiming that the subgroup size in the shader is 32 in GL like we have to do for Vulkan, claim 8 for geometry stages, the maximum for fragment shaders, and the actual size for compute. Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r--src/intel/compiler/brw_compiler.h9
-rw-r--r--src/intel/compiler/brw_fs.cpp6
-rw-r--r--src/intel/compiler/brw_nir.c39
-rw-r--r--src/intel/compiler/brw_nir.h1
-rw-r--r--src/intel/compiler/brw_shader.cpp2
-rw-r--r--src/intel/compiler/brw_vec4.cpp2
-rw-r--r--src/intel/compiler/brw_vec4_gs_visitor.cpp2
-rw-r--r--src/intel/compiler/brw_vec4_tcs.cpp2
8 files changed, 50 insertions, 13 deletions
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 8f8cca79deb..ba95df5b7dc 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -203,9 +203,18 @@ struct brw_sampler_prog_key_data {
float scale_factors[32];
};
+/** An enum representing what kind of input gl_SubgroupSize is. */
+enum PACKED brw_subgroup_size_type
+{
+ BRW_SUBGROUP_SIZE_API_CONSTANT, /**< Vulkan behavior */
+ BRW_SUBGROUP_SIZE_UNIFORM, /**< OpenGL behavior */
+};
+
struct brw_base_prog_key {
unsigned program_string_id;
+ enum brw_subgroup_size_type subgroup_size_type;
+
struct brw_sampler_prog_key_data tex;
};
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 30e04ddc318..4ab50aad11f 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7967,7 +7967,9 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
{
const struct gen_device_info *devinfo = compiler->devinfo;
- brw_nir_apply_key(shader, compiler, &key->base, true);
+ unsigned max_subgroup_size = unlikely(INTEL_DEBUG & DEBUG_DO32) ? 32 : 16;
+
+ brw_nir_apply_key(shader, compiler, &key->base, max_subgroup_size, true);
brw_nir_lower_fs_inputs(shader, devinfo, key);
brw_nir_lower_fs_outputs(shader);
@@ -8228,7 +8230,7 @@ compile_cs_to_nir(const struct brw_compiler *compiler,
unsigned dispatch_width)
{
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
- brw_nir_apply_key(shader, compiler, &key->base, true);
+ brw_nir_apply_key(shader, compiler, &key->base, dispatch_width, true);
NIR_PASS_V(shader, brw_nir_lower_cs_intrinsics, dispatch_width);
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 6bac6636c47..9e4b33c8b49 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -809,13 +809,6 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
UNUSED bool progress; /* Written by OPT */
- const nir_lower_subgroups_options subgroups_options = {
- .subgroup_size = BRW_SUBGROUP_SIZE,
- .ballot_bit_size = 32,
- .lower_subgroup_masks = true,
- };
- OPT(nir_lower_subgroups, &subgroups_options);
-
OPT(brw_nir_lower_mem_access_bit_sizes);
do {
@@ -973,16 +966,48 @@ brw_nir_apply_sampler_key(nir_shader *nir,
return nir_lower_tex(nir, &tex_options);
}
+static unsigned
+get_subgroup_size(const struct brw_base_prog_key *key,
+ unsigned max_subgroup_size)
+{
+ switch (key->subgroup_size_type) {
+ case BRW_SUBGROUP_SIZE_API_CONSTANT:
+ /* We have to use the global constant size. */
+ return BRW_SUBGROUP_SIZE;
+
+ case BRW_SUBGROUP_SIZE_UNIFORM:
+ /* It has to be uniform across all invocations but can vary per stage
+ * if we want. This gives us a bit more freedom.
+ *
+ * For compute, brw_nir_apply_key is called per-dispatch-width so this
+ * is the actual subgroup size and not a maximum. However, we only
+ * invoke one size of any given compute shader so it's still guaranteed
+ * to be uniform across invocations.
+ */
+ return max_subgroup_size;
+ }
+
+ unreachable("Invalid subgroup size type");
+}
+
void
brw_nir_apply_key(nir_shader *nir,
const struct brw_compiler *compiler,
const struct brw_base_prog_key *key,
+ unsigned max_subgroup_size,
bool is_scalar)
{
bool progress = false;
OPT(brw_nir_apply_sampler_key, compiler, &key->tex);
+ const nir_lower_subgroups_options subgroups_options = {
+ .subgroup_size = get_subgroup_size(key, max_subgroup_size),
+ .ballot_bit_size = 32,
+ .lower_subgroup_masks = true,
+ };
+ OPT(nir_lower_subgroups, &subgroups_options);
+
if (progress)
brw_nir_optimize(nir, compiler, is_scalar, false);
}
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 5f1843c6280..d3130476ab1 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -140,6 +140,7 @@ void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
void brw_nir_apply_key(nir_shader *nir,
const struct brw_compiler *compiler,
const struct brw_base_prog_key *key,
+ unsigned max_subgroup_size,
bool is_scalar);
enum brw_reg_type brw_type_for_nir_type(const struct gen_device_info *devinfo,
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index 686c5c3a46d..5f86e251c33 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -1244,7 +1244,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
nir->info.inputs_read = key->inputs_read;
nir->info.patch_inputs_read = key->patch_inputs_read;
- brw_nir_apply_key(nir, compiler, &key->base, is_scalar);
+ brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
brw_nir_lower_tes_inputs(nir, input_vue_map);
brw_nir_lower_vue_outputs(nir);
brw_postprocess_nir(nir, compiler, is_scalar);
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 5f7decd5410..2014f95f9c7 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -2845,7 +2845,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
char **error_str)
{
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX];
- brw_nir_apply_key(shader, compiler, &key->base, is_scalar);
+ brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar);
const unsigned *assembly = NULL;
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index c45e284e07e..b7476afb466 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -639,7 +639,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
&c.input_vue_map, inputs_read,
shader->info.separate_shader);
- brw_nir_apply_key(shader, compiler, &key->base, is_scalar);
+ brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar);
brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
brw_nir_lower_vue_outputs(shader);
brw_postprocess_nir(shader, compiler, is_scalar);
diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp
index d82439f2771..a107a9b6630 100644
--- a/src/intel/compiler/brw_vec4_tcs.cpp
+++ b/src/intel/compiler/brw_vec4_tcs.cpp
@@ -397,7 +397,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
nir->info.outputs_written,
nir->info.patch_outputs_written);
- brw_nir_apply_key(nir, compiler, &key->base, is_scalar);
+ brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
brw_nir_lower_vue_inputs(nir, &input_vue_map);
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
key->tes_primitive_mode);