summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-11-15 10:25:46 -0600
committerJason Ekstrand <[email protected]>2018-11-19 09:57:41 -0600
commit060817b2fa50bd71dc6a9ece605238ba11fc67e9 (patch)
tree1b59504420384fa2ebf84ab6c76898db4ef79ca9
parent486091bc00ea06f996910885bad080ce7f591c59 (diff)
intel,nir: Move gl_LocalInvocationID lowering to nir_lower_system_values
It's not at all intel-specific; the formula is dictated by OpenGL and Vulkan. The only intel-specific thing is that we need the lowering. As a nice side-effect, the new version is variable-group-size ready. Reviewed-by: Plamena Manolova <[email protected]>
-rw-r--r--src/compiler/nir/nir.h1
-rw-r--r--src/compiler/nir/nir_lower_system_values.c49
-rw-r--r--src/intel/compiler/brw_compiler.c1
-rw-r--r--src/intel/compiler/brw_nir_lower_cs_intrinsics.c33
4 files changed, 50 insertions, 34 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index b0cff50eaf2..1dd605010f6 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2178,6 +2178,7 @@ typedef struct nir_shader_compiler_options {
bool lower_helper_invocation;
bool lower_cs_local_index_from_id;
+ bool lower_cs_local_id_from_index;
bool lower_device_index_to_zero;
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
index fbc40573579..08a9e8be44a 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -51,6 +51,45 @@ build_local_group_size(nir_builder *b)
return local_size;
}
+static nir_ssa_def *
+build_local_invocation_id(nir_builder *b)
+{
+ if (b->shader->options->lower_cs_local_id_from_index) {
+ /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
+ * on this formula:
+ *
+ * gl_LocalInvocationID.x =
+ * gl_LocalInvocationIndex % gl_WorkGroupSize.x;
+ * gl_LocalInvocationID.y =
+ * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
+ * gl_WorkGroupSize.y;
+ * gl_LocalInvocationID.z =
+ * (gl_LocalInvocationIndex /
+ * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
+ * gl_WorkGroupSize.z;
+ *
+ * However, the final % gl_WorkGroupSize.z does nothing unless we
+ * accidentally end up with a gl_LocalInvocationIndex that is too
+ * large so it can safely be omitted.
+ */
+ nir_ssa_def *local_index = nir_load_local_invocation_index(b);
+ nir_ssa_def *local_size = build_local_group_size(b);
+
+ nir_ssa_def *id_x, *id_y, *id_z;
+ id_x = nir_umod(b, local_index,
+ nir_channel(b, local_size, 0));
+ id_y = nir_umod(b, nir_udiv(b, local_index,
+ nir_channel(b, local_size, 0)),
+ nir_channel(b, local_size, 1));
+ id_z = nir_udiv(b, local_index,
+ nir_imul(b, nir_channel(b, local_size, 0),
+ nir_channel(b, local_size, 1)));
+ return nir_vec3(b, id_x, id_y, id_z);
+ } else {
+ return nir_load_local_invocation_id(b);
+ }
+}
+
static bool
convert_block(nir_block *block, nir_builder *b)
{
@@ -91,7 +130,7 @@ convert_block(nir_block *block, nir_builder *b)
*/
nir_ssa_def *group_size = build_local_group_size(b);
nir_ssa_def *group_id = nir_load_work_group_id(b);
- nir_ssa_def *local_id = nir_load_local_invocation_id(b);
+ nir_ssa_def *local_id = build_local_invocation_id(b);
sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);
break;
@@ -126,6 +165,14 @@ convert_block(nir_block *block, nir_builder *b)
break;
}
+ case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+ /* If lower_cs_local_id_from_index is true, then we derive the local
+ * index from the local id.
+ */
+ if (b->shader->options->lower_cs_local_id_from_index)
+ sysval = build_local_invocation_id(b);
+ break;
+
case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
sysval = build_local_group_size(b);
break;
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index e863b08b991..fe632c5badc 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -42,6 +42,7 @@
.lower_fdiv = true, \
.lower_flrp64 = true, \
.lower_ldexp = true, \
+ .lower_cs_local_id_from_index = true, \
.lower_device_index_to_zero = true, \
.native_integers = true, \
.use_interpolated_input_intrinsics = true, \
diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
index bfbdea0e8fa..fab5edc893f 100644
--- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
+++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
@@ -70,39 +70,6 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
break;
}
- case nir_intrinsic_load_local_invocation_id: {
- /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
- * on this formula:
- *
- * gl_LocalInvocationID.x =
- * gl_LocalInvocationIndex % gl_WorkGroupSize.x;
- * gl_LocalInvocationID.y =
- * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
- * gl_WorkGroupSize.y;
- * gl_LocalInvocationID.z =
- * (gl_LocalInvocationIndex /
- * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
- * gl_WorkGroupSize.z;
- */
- unsigned *size = nir->info.cs.local_size;
-
- nir_ssa_def *local_index = nir_load_local_invocation_index(b);
-
- nir_const_value uvec3;
- memset(&uvec3, 0, sizeof(uvec3));
- uvec3.u32[0] = 1;
- uvec3.u32[1] = size[0];
- uvec3.u32[2] = size[0] * size[1];
- nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
- uvec3.u32[0] = size[0];
- uvec3.u32[1] = size[1];
- uvec3.u32[2] = size[2];
- nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
-
- sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);
- break;
- }
-
case nir_intrinsic_load_subgroup_id:
if (state->local_workgroup_size > 8)
continue;