summaryrefslogtreecommitdiffstats
path: root/src/compiler/nir/nir_split_vars.c
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-07-24 12:33:46 -0700
committerJason Ekstrand <[email protected]>2018-08-23 21:44:14 -0500
commitfa6417495c7096d337965345a0f2ad8a18115b19 (patch)
tree1c6daa7e30a52999dd4835ce2b5d117cc1be1fed /src/compiler/nir/nir_split_vars.c
parent26eb077ec486d0daf5a72d20bb5c03509592dce4 (diff)
nir: Add an array splitting pass
This pass looks for array variables where at least one level of the array is never indirected and splits it into multiple smaller variables. This pass doesn't really do much now because nir_lower_vars_to_ssa can already see through arrays of arrays and can detect indirects on just one level or even see that arr[i][0][5] does not alias arr[i][1][j]. This pass exists to help other passes more easily see through arrays of arrays. If a back-end does implement arrays using scratch or indirects on registers, having more smaller arrays is likely to have better memory efficiency. v2 (Jason Ekstrand): - Better comments and naming (some from Caio) - Rework to use one hash map instead of two v2.1 (Jason Ekstrand): - Fix a couple of bugs that were added in the rework including one which basically prevented it from running Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir_split_vars.c')
-rw-r--r--src/compiler/nir/nir_split_vars.c583
1 files changed, 583 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_split_vars.c b/src/compiler/nir/nir_split_vars.c
index 96b0452538f..788ca83b5e5 100644
--- a/src/compiler/nir/nir_split_vars.c
+++ b/src/compiler/nir/nir_split_vars.c
@@ -24,6 +24,7 @@
#include "nir.h"
#include "nir_builder.h"
#include "nir_deref.h"
+#include "nir_vla.h"
struct split_var_state {
void *mem_ctx;
@@ -57,6 +58,23 @@ wrap_type_in_array(const struct glsl_type *type,
return glsl_array_type(elem_type, glsl_get_length(array_type));
}
+static int
+num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
+{
+ int num_levels = 0;
+ while (true) {
+ if (glsl_type_is_array_or_matrix(type)) {
+ num_levels++;
+ type = glsl_get_array_element(type);
+ } else if (glsl_type_is_vector_or_scalar(type)) {
+ return num_levels;
+ } else {
+ /* Not an array of vectors */
+ return -1;
+ }
+ }
+}
+
static void
init_field_for_type(struct field *field, struct field *parent,
const struct glsl_type *type,
@@ -273,3 +291,568 @@ nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
return progress;
}
+
+struct array_level_info {
+ unsigned array_len;
+ bool split;
+};
+
+struct array_split {
+ /* Only set if this is the tail end of the splitting */
+ nir_variable *var;
+
+ unsigned num_splits;
+ struct array_split *splits;
+};
+
+struct array_var_info {
+ nir_variable *base_var;
+
+ const struct glsl_type *split_var_type;
+
+ bool split_var;
+ struct array_split root_split;
+
+ unsigned num_levels;
+ struct array_level_info levels[0];
+};
+
+static bool
+init_var_list_array_infos(struct exec_list *vars,
+ struct hash_table *var_info_map,
+ void *mem_ctx)
+{
+ bool has_array = false;
+
+ nir_foreach_variable(var, vars) {
+ int num_levels = num_array_levels_in_array_of_vector_type(var->type);
+ if (num_levels <= 0)
+ continue;
+
+ struct array_var_info *info =
+ rzalloc_size(mem_ctx, sizeof(*info) +
+ num_levels * sizeof(info->levels[0]));
+
+ info->base_var = var;
+ info->num_levels = num_levels;
+
+ const struct glsl_type *type = var->type;
+ for (int i = 0; i < num_levels; i++) {
+ info->levels[i].array_len = glsl_get_length(type);
+ type = glsl_get_array_element(type);
+
+ /* All levels start out initially as split */
+ info->levels[i].split = true;
+ }
+
+ _mesa_hash_table_insert(var_info_map, var, info);
+ has_array = true;
+ }
+
+ return has_array;
+}
+
+static struct array_var_info *
+get_array_var_info(nir_variable *var,
+ struct hash_table *var_info_map)
+{
+ struct hash_entry *entry =
+ _mesa_hash_table_search(var_info_map, var);
+ return entry ? entry->data : NULL;
+}
+
+static struct array_var_info *
+get_array_deref_info(nir_deref_instr *deref,
+ struct hash_table *var_info_map,
+ nir_variable_mode modes)
+{
+ if (!(deref->mode & modes))
+ return NULL;
+
+ return get_array_var_info(nir_deref_instr_get_variable(deref),
+ var_info_map);
+}
+
+static void
+mark_array_deref_used(nir_deref_instr *deref,
+ struct hash_table *var_info_map,
+ nir_variable_mode modes,
+ void *mem_ctx)
+{
+ struct array_var_info *info =
+ get_array_deref_info(deref, var_info_map, modes);
+ if (!info)
+ return;
+
+ nir_deref_path path;
+ nir_deref_path_init(&path, deref, mem_ctx);
+
+ /* Walk the path and look for indirects. If we have an array deref with an
+ * indirect, mark the given level as not being split.
+ */
+ for (unsigned i = 0; i < info->num_levels; i++) {
+ nir_deref_instr *p = path.path[i + 1];
+ if (p->deref_type == nir_deref_type_array &&
+ nir_src_as_const_value(p->arr.index) == NULL)
+ info->levels[i].split = false;
+ }
+}
+
+static void
+mark_array_usage_impl(nir_function_impl *impl,
+ struct hash_table *var_info_map,
+ nir_variable_mode modes,
+ void *mem_ctx)
+{
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_copy_deref:
+ mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
+ var_info_map, modes, mem_ctx);
+ /* Fall Through */
+
+ case nir_intrinsic_load_deref:
+ case nir_intrinsic_store_deref:
+ mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
+ var_info_map, modes, mem_ctx);
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+}
+
+static void
+create_split_array_vars(struct array_var_info *var_info,
+ unsigned level,
+ struct array_split *split,
+ const char *name,
+ nir_shader *shader,
+ nir_function_impl *impl,
+ void *mem_ctx)
+{
+ while (level < var_info->num_levels && !var_info->levels[level].split) {
+ name = ralloc_asprintf(mem_ctx, "%s[*]", name);
+ level++;
+ }
+
+ if (level == var_info->num_levels) {
+ /* We add parens to the variable name so it looks like "(foo[2][*])" so
+ * that further derefs will look like "(foo[2][*])[ssa_6]"
+ */
+ name = ralloc_asprintf(mem_ctx, "(%s)", name);
+
+ nir_variable_mode mode = var_info->base_var->data.mode;
+ if (mode == nir_var_local) {
+ split->var = nir_local_variable_create(impl,
+ var_info->split_var_type, name);
+ } else {
+ split->var = nir_variable_create(shader, mode,
+ var_info->split_var_type, name);
+ }
+ } else {
+ assert(var_info->levels[level].split);
+ split->num_splits = var_info->levels[level].array_len;
+ split->splits = rzalloc_array(mem_ctx, struct array_split,
+ split->num_splits);
+ for (unsigned i = 0; i < split->num_splits; i++) {
+ create_split_array_vars(var_info, level + 1, &split->splits[i],
+ ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
+ shader, impl, mem_ctx);
+ }
+ }
+}
+
+static bool
+split_var_list_arrays(nir_shader *shader,
+ nir_function_impl *impl,
+ struct exec_list *vars,
+ struct hash_table *var_info_map,
+ void *mem_ctx)
+{
+ struct exec_list split_vars;
+ exec_list_make_empty(&split_vars);
+
+ nir_foreach_variable_safe(var, vars) {
+ struct array_var_info *info = get_array_var_info(var, var_info_map);
+ if (!info)
+ continue;
+
+ bool has_split = false;
+ const struct glsl_type *split_type =
+ glsl_without_array_or_matrix(var->type);
+ for (int i = info->num_levels - 1; i >= 0; i--) {
+ if (info->levels[i].split) {
+ has_split = true;
+ continue;
+ }
+
+ /* If the original type was a matrix type, we'd like to keep that so
+ * we don't convert matrices into arrays.
+ */
+ if (i == info->num_levels - 1 &&
+ glsl_type_is_matrix(glsl_without_array(var->type))) {
+ split_type = glsl_matrix_type(glsl_get_base_type(split_type),
+ glsl_get_components(split_type),
+ info->levels[i].array_len);
+ } else {
+ split_type = glsl_array_type(split_type, info->levels[i].array_len);
+ }
+ }
+
+ if (has_split) {
+ info->split_var_type = split_type;
+ /* To avoid list confusion (we'll be adding things as we split
+ * variables), pull all of the variables we plan to split off of the
+ * main variable list.
+ */
+ exec_node_remove(&var->node);
+ exec_list_push_tail(&split_vars, &var->node);
+ } else {
+ assert(split_type == var->type);
+ /* If we're not modifying this variable, delete the info so we skip
+ * it faster in later passes.
+ */
+ _mesa_hash_table_remove_key(var_info_map, var);
+ }
+ }
+
+ nir_foreach_variable(var, &split_vars) {
+ struct array_var_info *info = get_array_var_info(var, var_info_map);
+ create_split_array_vars(info, 0, &info->root_split, var->name,
+ shader, impl, mem_ctx);
+ }
+
+ return !exec_list_is_empty(&split_vars);
+}
+
+static bool
+deref_has_split_wildcard(nir_deref_path *path,
+ struct array_var_info *info)
+{
+ if (info == NULL)
+ return false;
+
+ assert(path->path[0]->var == info->base_var);
+ for (unsigned i = 0; i < info->num_levels; i++) {
+ if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
+ info->levels[i].split)
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+array_path_is_out_of_bounds(nir_deref_path *path,
+ struct array_var_info *info)
+{
+ if (info == NULL)
+ return false;
+
+ assert(path->path[0]->var == info->base_var);
+ for (unsigned i = 0; i < info->num_levels; i++) {
+ nir_deref_instr *p = path->path[i + 1];
+ if (p->deref_type == nir_deref_type_array_wildcard)
+ continue;
+
+ nir_const_value *const_index = nir_src_as_const_value(p->arr.index);
+ if (const_index && const_index->u32[0] >= info->levels[i].array_len)
+ return true;
+ }
+
+ return false;
+}
+
+static void
+emit_split_copies(nir_builder *b,
+ struct array_var_info *dst_info, nir_deref_path *dst_path,
+ unsigned dst_level, nir_deref_instr *dst,
+ struct array_var_info *src_info, nir_deref_path *src_path,
+ unsigned src_level, nir_deref_instr *src)
+{
+ nir_deref_instr *dst_p, *src_p;
+
+ while ((dst_p = dst_path->path[dst_level + 1])) {
+ if (dst_p->deref_type == nir_deref_type_array_wildcard)
+ break;
+
+ dst = nir_build_deref_follower(b, dst, dst_p);
+ dst_level++;
+ }
+
+ while ((src_p = src_path->path[src_level + 1])) {
+ if (src_p->deref_type == nir_deref_type_array_wildcard)
+ break;
+
+ src = nir_build_deref_follower(b, src, src_p);
+ src_level++;
+ }
+
+ if (src_p == NULL || dst_p == NULL) {
+ assert(src_p == NULL && dst_p == NULL);
+ nir_copy_deref(b, dst, src);
+ } else {
+ assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
+ src_p->deref_type == nir_deref_type_array_wildcard);
+
+ if ((dst_info && dst_info->levels[dst_level].split) ||
+ (src_info && src_info->levels[src_level].split)) {
+ /* There are no indirects at this level on one of the source or the
+ * destination so we are lowering it.
+ */
+ assert(glsl_get_length(dst_path->path[dst_level]->type) ==
+ glsl_get_length(src_path->path[src_level]->type));
+ unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
+ for (unsigned i = 0; i < len; i++) {
+ nir_ssa_def *idx = nir_imm_int(b, i);
+ emit_split_copies(b, dst_info, dst_path, dst_level + 1,
+ nir_build_deref_array(b, dst, idx),
+ src_info, src_path, src_level + 1,
+ nir_build_deref_array(b, src, idx));
+ }
+ } else {
+ /* Neither side is being split so we just keep going */
+ emit_split_copies(b, dst_info, dst_path, dst_level + 1,
+ nir_build_deref_array_wildcard(b, dst),
+ src_info, src_path, src_level + 1,
+ nir_build_deref_array_wildcard(b, src));
+ }
+ }
+}
+
+static void
+split_array_copies_impl(nir_function_impl *impl,
+ struct hash_table *var_info_map,
+ nir_variable_mode modes,
+ void *mem_ctx)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
+ if (copy->intrinsic != nir_intrinsic_copy_deref)
+ continue;
+
+ nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
+ nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
+
+ struct array_var_info *dst_info =
+ get_array_deref_info(dst_deref, var_info_map, modes);
+ struct array_var_info *src_info =
+ get_array_deref_info(src_deref, var_info_map, modes);
+
+ if (!src_info && !dst_info)
+ continue;
+
+ nir_deref_path dst_path, src_path;
+ nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
+ nir_deref_path_init(&src_path, src_deref, mem_ctx);
+
+ if (!deref_has_split_wildcard(&dst_path, dst_info) &&
+ !deref_has_split_wildcard(&src_path, src_info))
+ continue;
+
+ b.cursor = nir_instr_remove(&copy->instr);
+
+ emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
+ src_info, &src_path, 0, src_path.path[0]);
+ }
+ }
+}
+
+static void
+split_array_access_impl(nir_function_impl *impl,
+ struct hash_table *var_info_map,
+ nir_variable_mode modes,
+ void *mem_ctx)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_deref) {
+ /* Clean up any dead derefs we find lying around. They may refer
+ * to variables we're planning to split.
+ */
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ if (deref->mode & modes)
+ nir_deref_instr_remove_if_unused(deref);
+ continue;
+ }
+
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != nir_intrinsic_load_deref &&
+ intrin->intrinsic != nir_intrinsic_store_deref &&
+ intrin->intrinsic != nir_intrinsic_copy_deref)
+ continue;
+
+ const unsigned num_derefs =
+ intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
+
+ for (unsigned d = 0; d < num_derefs; d++) {
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
+
+ struct array_var_info *info =
+ get_array_deref_info(deref, var_info_map, modes);
+ if (!info)
+ continue;
+
+ nir_deref_path path;
+ nir_deref_path_init(&path, deref, mem_ctx);
+
+ b.cursor = nir_before_instr(&intrin->instr);
+
+ if (array_path_is_out_of_bounds(&path, info)) {
+ /* If one of the derefs is out-of-bounds, we just delete the
+ * instruction. If a destination is out of bounds, then it may
+ * have been in-bounds prior to shrinking so we don't want to
+ * accidentally stomp something. However, we've already proven
+ * that it will never be read so it's safe to delete. If a
+ * source is out of bounds then it is loading random garbage.
+ * For loads, we replace their uses with an undef instruction
+ * and for copies we just delete the copy since it was writing
+ * undefined garbage anyway and we may as well leave the random
+ * garbage in the destination alone.
+ */
+ if (intrin->intrinsic == nir_intrinsic_load_deref) {
+ nir_ssa_def *u =
+ nir_ssa_undef(&b, intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(u));
+ }
+ nir_instr_remove(&intrin->instr);
+ for (unsigned i = 0; i < num_derefs; i++)
+ nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
+ break;
+ }
+
+ struct array_split *split = &info->root_split;
+ for (unsigned i = 0; i < info->num_levels; i++) {
+ if (info->levels[i].split) {
+ nir_deref_instr *p = path.path[i + 1];
+ unsigned index = nir_src_as_const_value(p->arr.index)->u32[0];
+ assert(index < info->levels[i].array_len);
+ split = &split->splits[index];
+ }
+ }
+ assert(!split->splits && split->var);
+
+ nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
+ for (unsigned i = 0; i < info->num_levels; i++) {
+ if (!info->levels[i].split) {
+ new_deref = nir_build_deref_follower(&b, new_deref,
+ path.path[i + 1]);
+ }
+ }
+ assert(new_deref->type == deref->type);
+
+ /* Rewrite the deref source to point to the split one */
+ nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
+ nir_src_for_ssa(&new_deref->dest.ssa));
+ nir_deref_instr_remove_if_unused(deref);
+ }
+ }
+ }
+}
+
+/** A pass for splitting arrays of vectors into multiple variables
+ *
+ * This pass looks at arrays (possibly multiple levels) of vectors (not
+ * structures or other types) and tries to split them into piles of variables,
+ * one for each array element. The heuristic used is simple: If a given array
+ * level is never used with an indirect, that array level will get split.
+ *
+ * This pass probably could handles structures easily enough but making a pass
+ * that could see through an array of structures of arrays would be difficult
+ * so it's best to just run nir_split_struct_vars first.
+ */
+bool
+nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
+{
+ void *mem_ctx = ralloc_context(NULL);
+ struct hash_table *var_info_map =
+ _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ assert((modes & (nir_var_global | nir_var_local)) == modes);
+
+ bool has_global_array = false;
+ if (modes & nir_var_global) {
+ has_global_array = init_var_list_array_infos(&shader->globals,
+ var_info_map, mem_ctx);
+ }
+
+ bool has_any_array = false;
+ nir_foreach_function(function, shader) {
+ if (!function->impl)
+ continue;
+
+ bool has_local_array = false;
+ if (modes & nir_var_local) {
+ has_local_array = init_var_list_array_infos(&function->impl->locals,
+ var_info_map, mem_ctx);
+ }
+
+ if (has_global_array || has_local_array) {
+ has_any_array = true;
+ mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
+ }
+ }
+
+ /* If we failed to find any arrays of arrays, bail early. */
+ if (!has_any_array) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ bool has_global_splits = false;
+ if (modes & nir_var_global) {
+ has_global_splits = split_var_list_arrays(shader, NULL,
+ &shader->globals,
+ var_info_map, mem_ctx);
+ }
+
+ bool progress = false;
+ nir_foreach_function(function, shader) {
+ if (!function->impl)
+ continue;
+
+ bool has_local_splits = false;
+ if (modes & nir_var_local) {
+ has_local_splits = split_var_list_arrays(shader, function->impl,
+ &function->impl->locals,
+ var_info_map, mem_ctx);
+ }
+
+ if (has_global_splits || has_local_splits) {
+ split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
+ split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
+
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ progress = true;
+ }
+ }
+
+ ralloc_free(mem_ctx);
+
+ return progress;
+}