/* * Copyright © 2018 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "nir.h" #include "nir_builder.h" #include "nir_deref.h" #include "nir_vla.h" /* Needed for _mesa_bitcount() */ #include "main/macros.h" struct split_var_state { void *mem_ctx; nir_shader *shader; nir_function_impl *impl; nir_variable *base_var; }; struct field { struct field *parent; const struct glsl_type *type; unsigned num_fields; struct field *fields; nir_variable *var; }; static const struct glsl_type * wrap_type_in_array(const struct glsl_type *type, const struct glsl_type *array_type) { if (!glsl_type_is_array(array_type)) return type; const struct glsl_type *elem_type = wrap_type_in_array(type, glsl_get_array_element(array_type)); return glsl_array_type(elem_type, glsl_get_length(array_type)); } static int num_array_levels_in_array_of_vector_type(const struct glsl_type *type) { int num_levels = 0; while (true) { if (glsl_type_is_array_or_matrix(type)) { num_levels++; type = glsl_get_array_element(type); } else if (glsl_type_is_vector_or_scalar(type)) { return num_levels; } else { /* Not an array of vectors */ return -1; } } } static void init_field_for_type(struct field *field, struct field *parent, const struct glsl_type *type, const char *name, struct split_var_state *state) { *field = (struct field) { .parent = parent, .type = type, }; const struct glsl_type *struct_type = glsl_without_array(type); if (glsl_type_is_struct(struct_type)) { field->num_fields = glsl_get_length(struct_type), field->fields = ralloc_array(state->mem_ctx, struct field, field->num_fields); for (unsigned i = 0; i < field->num_fields; i++) { char *field_name = NULL; if (name) { field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name, glsl_get_struct_elem_name(struct_type, i)); } else { field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s", glsl_get_type_name(struct_type), glsl_get_struct_elem_name(struct_type, i)); } init_field_for_type(&field->fields[i], field, glsl_get_struct_field(struct_type, i), field_name, state); } } else { const struct glsl_type *var_type = type; for (struct field *f = field->parent; f; f = f->parent) var_type = wrap_type_in_array(var_type, f->type); nir_variable_mode mode = state->base_var->data.mode; if (mode == nir_var_local) { field->var = nir_local_variable_create(state->impl, var_type, name); } else { field->var = nir_variable_create(state->shader, mode, var_type, name); } } } static bool split_var_list_structs(nir_shader *shader, nir_function_impl *impl, struct exec_list *vars, struct hash_table *var_field_map, void *mem_ctx) { struct split_var_state state = { .mem_ctx = mem_ctx, .shader = shader, .impl = impl, }; struct exec_list split_vars; exec_list_make_empty(&split_vars); /* To avoid list confusion (we'll be adding things as we split variables), * pull all of the variables we plan to split off of the list */ nir_foreach_variable_safe(var, vars) { if (!glsl_type_is_struct(glsl_without_array(var->type))) continue; exec_node_remove(&var->node); exec_list_push_tail(&split_vars, &var->node); } nir_foreach_variable(var, &split_vars) { state.base_var = var; struct field *root_field = ralloc(mem_ctx, struct field); init_field_for_type(root_field, NULL, var->type, var->name, &state); _mesa_hash_table_insert(var_field_map, var, root_field); } return !exec_list_is_empty(&split_vars); } static void split_struct_derefs_impl(nir_function_impl *impl, struct hash_table *var_field_map, nir_variable_mode modes, void *mem_ctx) { nir_builder b; nir_builder_init(&b, impl); nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_deref) continue; nir_deref_instr *deref = nir_instr_as_deref(instr); if (!(deref->mode & modes)) continue; /* Clean up any dead derefs we find lying around. They may refer to * variables we're planning to split. */ if (nir_deref_instr_remove_if_unused(deref)) continue; if (!glsl_type_is_vector_or_scalar(deref->type)) continue; nir_variable *base_var = nir_deref_instr_get_variable(deref); struct hash_entry *entry = _mesa_hash_table_search(var_field_map, base_var); if (!entry) continue; struct field *root_field = entry->data; nir_deref_path path; nir_deref_path_init(&path, deref, mem_ctx); struct field *tail_field = root_field; for (unsigned i = 0; path.path[i]; i++) { if (path.path[i]->deref_type != nir_deref_type_struct) continue; assert(i > 0); assert(glsl_type_is_struct(path.path[i - 1]->type)); assert(path.path[i - 1]->type == glsl_without_array(tail_field->type)); tail_field = &tail_field->fields[path.path[i]->strct.index]; } nir_variable *split_var = tail_field->var; nir_deref_instr *new_deref = NULL; for (unsigned i = 0; path.path[i]; i++) { nir_deref_instr *p = path.path[i]; b.cursor = nir_after_instr(&p->instr); switch (p->deref_type) { case nir_deref_type_var: assert(new_deref == NULL); new_deref = nir_build_deref_var(&b, split_var); break; case nir_deref_type_array: case nir_deref_type_array_wildcard: new_deref = nir_build_deref_follower(&b, new_deref, p); break; case nir_deref_type_struct: /* Nothing to do; we're splitting structs */ break; default: unreachable("Invalid deref type in path"); } } assert(new_deref->type == deref->type); nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(&new_deref->dest.ssa)); nir_deref_instr_remove_if_unused(deref); } } } /** A pass for splitting structs into multiple variables * * This pass splits arrays of structs into multiple variables, one for each * (possibly nested) structure member. After this pass completes, no * variables of the given mode will contain a struct type. */ bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes) { void *mem_ctx = ralloc_context(NULL); struct hash_table *var_field_map = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); assert((modes & (nir_var_global | nir_var_local)) == modes); bool has_global_splits = false; if (modes & nir_var_global) { has_global_splits = split_var_list_structs(shader, NULL, &shader->globals, var_field_map, mem_ctx); } bool progress = false; nir_foreach_function(function, shader) { if (!function->impl) continue; bool has_local_splits = false; if (modes & nir_var_local) { has_local_splits = split_var_list_structs(shader, function->impl, &function->impl->locals, var_field_map, mem_ctx); } if (has_global_splits || has_local_splits) { split_struct_derefs_impl(function->impl, var_field_map, modes, mem_ctx); nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); progress = true; } } ralloc_free(mem_ctx); return progress; } struct array_level_info { unsigned array_len; bool split; }; struct array_split { /* Only set if this is the tail end of the splitting */ nir_variable *var; unsigned num_splits; struct array_split *splits; }; struct array_var_info { nir_variable *base_var; const struct glsl_type *split_var_type; bool split_var; struct array_split root_split; unsigned num_levels; struct array_level_info levels[0]; }; static bool init_var_list_array_infos(struct exec_list *vars, struct hash_table *var_info_map, void *mem_ctx) { bool has_array = false; nir_foreach_variable(var, vars) { int num_levels = num_array_levels_in_array_of_vector_type(var->type); if (num_levels <= 0) continue; struct array_var_info *info = rzalloc_size(mem_ctx, sizeof(*info) + num_levels * sizeof(info->levels[0])); info->base_var = var; info->num_levels = num_levels; const struct glsl_type *type = var->type; for (int i = 0; i < num_levels; i++) { info->levels[i].array_len = glsl_get_length(type); type = glsl_get_array_element(type); /* All levels start out initially as split */ info->levels[i].split = true; } _mesa_hash_table_insert(var_info_map, var, info); has_array = true; } return has_array; } static struct array_var_info * get_array_var_info(nir_variable *var, struct hash_table *var_info_map) { struct hash_entry *entry = _mesa_hash_table_search(var_info_map, var); return entry ? entry->data : NULL; } static struct array_var_info * get_array_deref_info(nir_deref_instr *deref, struct hash_table *var_info_map, nir_variable_mode modes) { if (!(deref->mode & modes)) return NULL; return get_array_var_info(nir_deref_instr_get_variable(deref), var_info_map); } static void mark_array_deref_used(nir_deref_instr *deref, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx) { struct array_var_info *info = get_array_deref_info(deref, var_info_map, modes); if (!info) return; nir_deref_path path; nir_deref_path_init(&path, deref, mem_ctx); /* Walk the path and look for indirects. If we have an array deref with an * indirect, mark the given level as not being split. */ for (unsigned i = 0; i < info->num_levels; i++) { nir_deref_instr *p = path.path[i + 1]; if (p->deref_type == nir_deref_type_array && nir_src_as_const_value(p->arr.index) == NULL) info->levels[i].split = false; } } static void mark_array_usage_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx) { nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { case nir_intrinsic_copy_deref: mark_array_deref_used(nir_src_as_deref(intrin->src[1]), var_info_map, modes, mem_ctx); /* Fall Through */ case nir_intrinsic_load_deref: case nir_intrinsic_store_deref: mark_array_deref_used(nir_src_as_deref(intrin->src[0]), var_info_map, modes, mem_ctx); break; default: break; } } } } static void create_split_array_vars(struct array_var_info *var_info, unsigned level, struct array_split *split, const char *name, nir_shader *shader, nir_function_impl *impl, void *mem_ctx) { while (level < var_info->num_levels && !var_info->levels[level].split) { name = ralloc_asprintf(mem_ctx, "%s[*]", name); level++; } if (level == var_info->num_levels) { /* We add parens to the variable name so it looks like "(foo[2][*])" so * that further derefs will look like "(foo[2][*])[ssa_6]" */ name = ralloc_asprintf(mem_ctx, "(%s)", name); nir_variable_mode mode = var_info->base_var->data.mode; if (mode == nir_var_local) { split->var = nir_local_variable_create(impl, var_info->split_var_type, name); } else { split->var = nir_variable_create(shader, mode, var_info->split_var_type, name); } } else { assert(var_info->levels[level].split); split->num_splits = var_info->levels[level].array_len; split->splits = rzalloc_array(mem_ctx, struct array_split, split->num_splits); for (unsigned i = 0; i < split->num_splits; i++) { create_split_array_vars(var_info, level + 1, &split->splits[i], ralloc_asprintf(mem_ctx, "%s[%d]", name, i), shader, impl, mem_ctx); } } } static bool split_var_list_arrays(nir_shader *shader, nir_function_impl *impl, struct exec_list *vars, struct hash_table *var_info_map, void *mem_ctx) { struct exec_list split_vars; exec_list_make_empty(&split_vars); nir_foreach_variable_safe(var, vars) { struct array_var_info *info = get_array_var_info(var, var_info_map); if (!info) continue; bool has_split = false; const struct glsl_type *split_type = glsl_without_array_or_matrix(var->type); for (int i = info->num_levels - 1; i >= 0; i--) { if (info->levels[i].split) { has_split = true; continue; } /* If the original type was a matrix type, we'd like to keep that so * we don't convert matrices into arrays. */ if (i == info->num_levels - 1 && glsl_type_is_matrix(glsl_without_array(var->type))) { split_type = glsl_matrix_type(glsl_get_base_type(split_type), glsl_get_components(split_type), info->levels[i].array_len); } else { split_type = glsl_array_type(split_type, info->levels[i].array_len); } } if (has_split) { info->split_var_type = split_type; /* To avoid list confusion (we'll be adding things as we split * variables), pull all of the variables we plan to split off of the * main variable list. */ exec_node_remove(&var->node); exec_list_push_tail(&split_vars, &var->node); } else { assert(split_type == var->type); /* If we're not modifying this variable, delete the info so we skip * it faster in later passes. */ _mesa_hash_table_remove_key(var_info_map, var); } } nir_foreach_variable(var, &split_vars) { struct array_var_info *info = get_array_var_info(var, var_info_map); create_split_array_vars(info, 0, &info->root_split, var->name, shader, impl, mem_ctx); } return !exec_list_is_empty(&split_vars); } static bool deref_has_split_wildcard(nir_deref_path *path, struct array_var_info *info) { if (info == NULL) return false; assert(path->path[0]->var == info->base_var); for (unsigned i = 0; i < info->num_levels; i++) { if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard && info->levels[i].split) return true; } return false; } static bool array_path_is_out_of_bounds(nir_deref_path *path, struct array_var_info *info) { if (info == NULL) return false; assert(path->path[0]->var == info->base_var); for (unsigned i = 0; i < info->num_levels; i++) { nir_deref_instr *p = path->path[i + 1]; if (p->deref_type == nir_deref_type_array_wildcard) continue; nir_const_value *const_index = nir_src_as_const_value(p->arr.index); if (const_index && const_index->u32[0] >= info->levels[i].array_len) return true; } return false; } static void emit_split_copies(nir_builder *b, struct array_var_info *dst_info, nir_deref_path *dst_path, unsigned dst_level, nir_deref_instr *dst, struct array_var_info *src_info, nir_deref_path *src_path, unsigned src_level, nir_deref_instr *src) { nir_deref_instr *dst_p, *src_p; while ((dst_p = dst_path->path[dst_level + 1])) { if (dst_p->deref_type == nir_deref_type_array_wildcard) break; dst = nir_build_deref_follower(b, dst, dst_p); dst_level++; } while ((src_p = src_path->path[src_level + 1])) { if (src_p->deref_type == nir_deref_type_array_wildcard) break; src = nir_build_deref_follower(b, src, src_p); src_level++; } if (src_p == NULL || dst_p == NULL) { assert(src_p == NULL && dst_p == NULL); nir_copy_deref(b, dst, src); } else { assert(dst_p->deref_type == nir_deref_type_array_wildcard && src_p->deref_type == nir_deref_type_array_wildcard); if ((dst_info && dst_info->levels[dst_level].split) || (src_info && src_info->levels[src_level].split)) { /* There are no indirects at this level on one of the source or the * destination so we are lowering it. */ assert(glsl_get_length(dst_path->path[dst_level]->type) == glsl_get_length(src_path->path[src_level]->type)); unsigned len = glsl_get_length(dst_path->path[dst_level]->type); for (unsigned i = 0; i < len; i++) { nir_ssa_def *idx = nir_imm_int(b, i); emit_split_copies(b, dst_info, dst_path, dst_level + 1, nir_build_deref_array(b, dst, idx), src_info, src_path, src_level + 1, nir_build_deref_array(b, src, idx)); } } else { /* Neither side is being split so we just keep going */ emit_split_copies(b, dst_info, dst_path, dst_level + 1, nir_build_deref_array_wildcard(b, dst), src_info, src_path, src_level + 1, nir_build_deref_array_wildcard(b, src)); } } } static void split_array_copies_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx) { nir_builder b; nir_builder_init(&b, impl); nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); if (copy->intrinsic != nir_intrinsic_copy_deref) continue; nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]); nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]); struct array_var_info *dst_info = get_array_deref_info(dst_deref, var_info_map, modes); struct array_var_info *src_info = get_array_deref_info(src_deref, var_info_map, modes); if (!src_info && !dst_info) continue; nir_deref_path dst_path, src_path; nir_deref_path_init(&dst_path, dst_deref, mem_ctx); nir_deref_path_init(&src_path, src_deref, mem_ctx); if (!deref_has_split_wildcard(&dst_path, dst_info) && !deref_has_split_wildcard(&src_path, src_info)) continue; b.cursor = nir_instr_remove(©->instr); emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0], src_info, &src_path, 0, src_path.path[0]); } } } static void split_array_access_impl(nir_function_impl *impl, struct hash_table *var_info_map, nir_variable_mode modes, void *mem_ctx) { nir_builder b; nir_builder_init(&b, impl); nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type == nir_instr_type_deref) { /* Clean up any dead derefs we find lying around. They may refer * to variables we're planning to split. */ nir_deref_instr *deref = nir_instr_as_deref(instr); if (deref->mode & modes) nir_deref_instr_remove_if_unused(deref); continue; } if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic != nir_intrinsic_load_deref && intrin->intrinsic != nir_intrinsic_store_deref && intrin->intrinsic != nir_intrinsic_copy_deref) continue; const unsigned num_derefs = intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1; for (unsigned d = 0; d < num_derefs; d++) { nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]); struct array_var_info *info = get_array_deref_info(deref, var_info_map, modes); if (!info) continue; nir_deref_path path; nir_deref_path_init(&path, deref, mem_ctx); b.cursor = nir_before_instr(&intrin->instr); if (array_path_is_out_of_bounds(&path, info)) { /* If one of the derefs is out-of-bounds, we just delete the * instruction. If a destination is out of bounds, then it may * have been in-bounds prior to shrinking so we don't want to * accidentally stomp something. However, we've already proven * that it will never be read so it's safe to delete. If a * source is out of bounds then it is loading random garbage. * For loads, we replace their uses with an undef instruction * and for copies we just delete the copy since it was writing * undefined garbage anyway and we may as well leave the random * garbage in the destination alone. */ if (intrin->intrinsic == nir_intrinsic_load_deref) { nir_ssa_def *u = nir_ssa_undef(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(u)); } nir_instr_remove(&intrin->instr); for (unsigned i = 0; i < num_derefs; i++) nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i])); break; } struct array_split *split = &info->root_split; for (unsigned i = 0; i < info->num_levels; i++) { if (info->levels[i].split) { nir_deref_instr *p = path.path[i + 1]; unsigned index = nir_src_as_const_value(p->arr.index)->u32[0]; assert(index < info->levels[i].array_len); split = &split->splits[index]; } } assert(!split->splits && split->var); nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var); for (unsigned i = 0; i < info->num_levels; i++) { if (!info->levels[i].split) { new_deref = nir_build_deref_follower(&b, new_deref, path.path[i + 1]); } } assert(new_deref->type == deref->type); /* Rewrite the deref source to point to the split one */ nir_instr_rewrite_src(&intrin->instr, &intrin->src[d], nir_src_for_ssa(&new_deref->dest.ssa)); nir_deref_instr_remove_if_unused(deref); } } } } /** A pass for splitting arrays of vectors into multiple variables * * This pass looks at arrays (possibly multiple levels) of vectors (not * structures or other types) and tries to split them into piles of variables, * one for each array element. The heuristic used is simple: If a given array * level is never used with an indirect, that array level will get split. * * This pass probably could handles structures easily enough but making a pass * that could see through an array of structures of arrays would be difficult * so it's best to just run nir_split_struct_vars first. */ bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes) { void *mem_ctx = ralloc_context(NULL); struct hash_table *var_info_map = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); assert((modes & (nir_var_global | nir_var_local)) == modes); bool has_global_array = false; if (modes & nir_var_global) { has_global_array = init_var_list_array_infos(&shader->globals, var_info_map, mem_ctx); } bool has_any_array = false; nir_foreach_function(function, shader) { if (!function->impl) continue; bool has_local_array = false; if (modes & nir_var_local) { has_local_array = init_var_list_array_infos(&function->impl->locals, var_info_map, mem_ctx); } if (has_global_array || has_local_array) { has_any_array = true; mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx); } } /* If we failed to find any arrays of arrays, bail early. */ if (!has_any_array) { ralloc_free(mem_ctx); return false; } bool has_global_splits = false; if (modes & nir_var_global) { has_global_splits = split_var_list_arrays(shader, NULL, &shader->globals, var_info_map, mem_ctx); } bool progress = false; nir_foreach_function(function, shader) { if (!function->impl) continue; bool has_local_splits = false; if (modes & nir_var_local) { has_local_splits = split_var_list_arrays(shader, function->impl, &function->impl->locals, var_info_map, mem_ctx); } if (has_global_splits || has_local_splits) { split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx); split_array_access_impl(function->impl, var_info_map, modes, mem_ctx); nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); progress = true; } } ralloc_free(mem_ctx); return progress; } struct array_level_usage { unsigned array_len; /* The value UINT_MAX will be used to indicate an indirect */ unsigned max_read; unsigned max_written; /* True if there is a copy that isn't to/from a shrinkable array */ bool has_external_copy; struct set *levels_copied; }; struct vec_var_usage { /* Convenience set of all components this variable has */ nir_component_mask_t all_comps; nir_component_mask_t comps_read; nir_component_mask_t comps_written; nir_component_mask_t comps_kept; /* True if there is a copy that isn't to/from a shrinkable vector */ bool has_external_copy; struct set *vars_copied; unsigned num_levels; struct array_level_usage levels[0]; }; static struct vec_var_usage * get_vec_var_usage(nir_variable *var, struct hash_table *var_usage_map, bool add_usage_entry, void *mem_ctx) { struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var); if (entry) return entry->data; if (!add_usage_entry) return NULL; /* Check to make sure that we are working with an array of vectors. We * don't bother to shrink single vectors because we figure that we can * clean it up better with SSA than by inserting piles of vecN instructions * to compact results. */ int num_levels = num_array_levels_in_array_of_vector_type(var->type); if (num_levels < 1) return NULL; /* Not an array of vectors */ struct vec_var_usage *usage = rzalloc_size(mem_ctx, sizeof(*usage) + num_levels * sizeof(usage->levels[0])); usage->num_levels = num_levels; const struct glsl_type *type = var->type; for (unsigned i = 0; i < num_levels; i++) { usage->levels[i].array_len = glsl_get_length(type); type = glsl_get_array_element(type); } assert(glsl_type_is_vector_or_scalar(type)); usage->all_comps = (1 << glsl_get_components(type)) - 1; _mesa_hash_table_insert(var_usage_map, var, usage); return usage; } static struct vec_var_usage * get_vec_deref_usage(nir_deref_instr *deref, struct hash_table *var_usage_map, nir_variable_mode modes, bool add_usage_entry, void *mem_ctx) { if (!(deref->mode & modes)) return NULL; return get_vec_var_usage(nir_deref_instr_get_variable(deref), var_usage_map, add_usage_entry, mem_ctx); } static void mark_deref_used(nir_deref_instr *deref, nir_component_mask_t comps_read, nir_component_mask_t comps_written, nir_deref_instr *copy_deref, struct hash_table *var_usage_map, nir_variable_mode modes, void *mem_ctx) { if (!(deref->mode & modes)) return; nir_variable *var = nir_deref_instr_get_variable(deref); struct vec_var_usage *usage = get_vec_var_usage(var, var_usage_map, true, mem_ctx); if (!usage) return; usage->comps_read |= comps_read & usage->all_comps; usage->comps_written |= comps_written & usage->all_comps; struct vec_var_usage *copy_usage = NULL; if (copy_deref) { copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes, true, mem_ctx); if (copy_usage) { if (usage->vars_copied == NULL) { usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); } _mesa_set_add(usage->vars_copied, copy_usage); } else { usage->has_external_copy = true; } } nir_deref_path path; nir_deref_path_init(&path, deref, mem_ctx); nir_deref_path copy_path; if (copy_usage) nir_deref_path_init(©_path, copy_deref, mem_ctx); unsigned copy_i = 0; for (unsigned i = 0; i < usage->num_levels; i++) { struct array_level_usage *level = &usage->levels[i]; nir_deref_instr *deref = path.path[i + 1]; assert(deref->deref_type == nir_deref_type_array || deref->deref_type == nir_deref_type_array_wildcard); unsigned max_used; if (deref->deref_type == nir_deref_type_array) { nir_const_value *const_index = nir_src_as_const_value(deref->arr.index); max_used = const_index ? const_index->u32[0] : UINT_MAX; } else { /* For wildcards, we read or wrote the whole thing. */ assert(deref->deref_type == nir_deref_type_array_wildcard); max_used = level->array_len - 1; if (copy_usage) { /* Match each wildcard level with the level on copy_usage */ for (; copy_path.path[copy_i + 1]; copy_i++) { if (copy_path.path[copy_i + 1]->deref_type == nir_deref_type_array_wildcard) break; } struct array_level_usage *copy_level = ©_usage->levels[copy_i++]; if (level->levels_copied == NULL) { level->levels_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); } _mesa_set_add(level->levels_copied, copy_level); } else { /* We have a wildcard and it comes from a variable we aren't * tracking; flag it and we'll know to not shorten this array. */ level->has_external_copy = true; } } if (comps_written) level->max_written = MAX2(level->max_written, max_used); if (comps_read) level->max_read = MAX2(level->max_read, max_used); } } static bool src_is_load_deref(nir_src src, nir_src deref_src) { assert(src.is_ssa); assert(deref_src.is_ssa); if (src.ssa->parent_instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr); if (load->intrinsic != nir_intrinsic_load_deref) return false; assert(load->src[0].is_ssa); return load->src[0].ssa == deref_src.ssa; } /* Returns all non-self-referential components of a store instruction. A * component is self-referential if it comes from the same component of a load * instruction on the same deref. If the only data in a particular component * of a variable came directly from that component then it's undefined. The * only way to get defined data into a component of a variable is for it to * get written there by something outside or from a different component. * * This is a fairly common pattern in shaders that come from either GLSL IR or * GLSLang because both glsl_to_nir and GLSLang implement write-masking with * load-vec-store. */ static nir_component_mask_t get_non_self_referential_store_comps(nir_intrinsic_instr *store) { nir_component_mask_t comps = nir_intrinsic_write_mask(store); assert(store->src[1].is_ssa); nir_instr *src_instr = store->src[1].ssa->parent_instr; if (src_instr->type != nir_instr_type_alu) return comps; nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); if (src_alu->op == nir_op_imov || src_alu->op == nir_op_fmov) { /* If it's just a swizzle of a load from the same deref, discount any * channels that don't move in the swizzle. */ if (src_is_load_deref(src_alu->src[0].src, store->src[0])) { for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { if (src_alu->src[0].swizzle[i] == i) comps &= ~(1u << i); } } } else if (src_alu->op == nir_op_vec2 || src_alu->op == nir_op_vec3 || src_alu->op == nir_op_vec4) { /* If it's a vec, discount any channels that are just loads from the * same deref put in the same spot. */ for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) { if (src_is_load_deref(src_alu->src[i].src, store->src[0]) && src_alu->src[i].swizzle[0] == i) comps &= ~(1u << i); } } return comps; } static void find_used_components_impl(nir_function_impl *impl, struct hash_table *var_usage_map, nir_variable_mode modes, void *mem_ctx) { nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { case nir_intrinsic_load_deref: mark_deref_used(nir_src_as_deref(intrin->src[0]), nir_ssa_def_components_read(&intrin->dest.ssa), 0, NULL, var_usage_map, modes, mem_ctx); break; case nir_intrinsic_store_deref: mark_deref_used(nir_src_as_deref(intrin->src[0]), 0, get_non_self_referential_store_comps(intrin), NULL, var_usage_map, modes, mem_ctx); break; case nir_intrinsic_copy_deref: { /* Just mark everything used for copies. */ nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx); mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx); break; } default: break; } } } } static bool shrink_vec_var_list(struct exec_list *vars, struct hash_table *var_usage_map) { /* Initialize the components kept field of each variable. This is the * AND of the components written and components read. If a component is * written but never read, it's dead. If it is read but never written, * then all values read are undefined garbage and we may as well not read * them. * * The same logic applies to the array length. We make the array length * the minimum needed required length between read and write and plan to * discard any OOB access. The one exception here is indirect writes * because we don't know where they will land and we can't shrink an array * with indirect writes because previously in-bounds writes may become * out-of-bounds and have undefined behavior. * * Also, if we have a copy that to/from something we can't shrink, we need * to leave components and array_len of any wildcards alone. */ nir_foreach_variable(var, vars) { struct vec_var_usage *usage = get_vec_var_usage(var, var_usage_map, false, NULL); if (!usage) continue; assert(usage->comps_kept == 0); if (usage->has_external_copy) usage->comps_kept = usage->all_comps; else usage->comps_kept = usage->comps_read & usage->comps_written; for (unsigned i = 0; i < usage->num_levels; i++) { struct array_level_usage *level = &usage->levels[i]; assert(level->array_len > 0); if (level->max_written == UINT_MAX || level->has_external_copy) continue; /* Can't shrink */ unsigned max_used = MIN2(level->max_read, level->max_written); level->array_len = MIN2(max_used, level->array_len - 1) + 1; } } /* In order for variable copies to work, we have to have the same data type * on the source and the destination. In order to satisfy this, we run a * little fixed-point algorithm to transitively ensure that we get enough * components and array elements for this to hold for all copies. */ bool fp_progress; do { fp_progress = false; nir_foreach_variable(var, vars) { struct vec_var_usage *var_usage = get_vec_var_usage(var, var_usage_map, false, NULL); if (!var_usage || !var_usage->vars_copied) continue; struct set_entry *copy_entry; set_foreach(var_usage->vars_copied, copy_entry) { struct vec_var_usage *copy_usage = (void *)copy_entry->key; if (copy_usage->comps_kept != var_usage->comps_kept) { nir_component_mask_t comps_kept = (var_usage->comps_kept | copy_usage->comps_kept); var_usage->comps_kept = comps_kept; copy_usage->comps_kept = comps_kept; fp_progress = true; } } for (unsigned i = 0; i < var_usage->num_levels; i++) { struct array_level_usage *var_level = &var_usage->levels[i]; if (!var_level->levels_copied) continue; set_foreach(var_level->levels_copied, copy_entry) { struct array_level_usage *copy_level = (void *)copy_entry->key; if (var_level->array_len != copy_level->array_len) { unsigned array_len = MAX2(var_level->array_len, copy_level->array_len); var_level->array_len = array_len; copy_level->array_len = array_len; fp_progress = true; } } } } } while (fp_progress); bool vars_shrunk = false; nir_foreach_variable_safe(var, vars) { struct vec_var_usage *usage = get_vec_var_usage(var, var_usage_map, false, NULL); if (!usage) continue; bool shrunk = false; const struct glsl_type *vec_type = var->type; for (unsigned i = 0; i < usage->num_levels; i++) { /* If we've reduced the array to zero elements at some level, just * set comps_kept to 0 and delete the variable. */ if (usage->levels[i].array_len == 0) { usage->comps_kept = 0; break; } assert(usage->levels[i].array_len <= glsl_get_length(vec_type)); if (usage->levels[i].array_len < glsl_get_length(vec_type)) shrunk = true; vec_type = glsl_get_array_element(vec_type); } assert(glsl_type_is_vector_or_scalar(vec_type)); assert(usage->comps_kept == (usage->comps_kept & usage->all_comps)); if (usage->comps_kept != usage->all_comps) shrunk = true; if (usage->comps_kept == 0) { /* This variable is dead, remove it */ vars_shrunk = true; exec_node_remove(&var->node); continue; } if (!shrunk) { /* This variable doesn't need to be shrunk. Remove it from the * hash table so later steps will ignore it. */ _mesa_hash_table_remove_key(var_usage_map, var); continue; } /* Build the new var type */ unsigned new_num_comps = _mesa_bitcount(usage->comps_kept); const struct glsl_type *new_type = glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps); for (int i = usage->num_levels - 1; i >= 0; i--) { assert(usage->levels[i].array_len > 0); /* If the original type was a matrix type, we'd like to keep that so * we don't convert matrices into arrays. */ if (i == usage->num_levels - 1 && glsl_type_is_matrix(glsl_without_array(var->type)) && new_num_comps > 1 && usage->levels[i].array_len > 1) { new_type = glsl_matrix_type(glsl_get_base_type(new_type), new_num_comps, usage->levels[i].array_len); } else { new_type = glsl_array_type(new_type, usage->levels[i].array_len); } } var->type = new_type; vars_shrunk = true; } return vars_shrunk; } static bool vec_deref_is_oob(nir_deref_instr *deref, struct vec_var_usage *usage) { nir_deref_path path; nir_deref_path_init(&path, deref, NULL); bool oob = false; for (unsigned i = 0; i < usage->num_levels; i++) { nir_deref_instr *p = path.path[i + 1]; if (p->deref_type == nir_deref_type_array_wildcard) continue; nir_const_value *const_index = nir_src_as_const_value(p->arr.index); if (const_index && const_index->u32[0] >= usage->levels[i].array_len) { oob = true; break; } } nir_deref_path_finish(&path); return oob; } static bool vec_deref_is_dead_or_oob(nir_deref_instr *deref, struct hash_table *var_usage_map, nir_variable_mode modes) { struct vec_var_usage *usage = get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); if (!usage) return false; return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage); } static void shrink_vec_var_access_impl(nir_function_impl *impl, struct hash_table *var_usage_map, nir_variable_mode modes) { nir_builder b; nir_builder_init(&b, impl); nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { switch (instr->type) { case nir_instr_type_deref: { nir_deref_instr *deref = nir_instr_as_deref(instr); if (!(deref->mode & modes)) break; /* Clean up any dead derefs we find lying around. They may refer * to variables we've deleted. */ if (nir_deref_instr_remove_if_unused(deref)) break; /* Update the type in the deref to keep the types consistent as * you walk down the chain. We don't need to check if this is one * of the derefs we're shrinking because this is a no-op if it * isn't. The worst that could happen is that we accidentally fix * an invalid deref. */ if (deref->deref_type == nir_deref_type_var) { deref->type = deref->var->type; } else if (deref->deref_type == nir_deref_type_array || deref->deref_type == nir_deref_type_array_wildcard) { nir_deref_instr *parent = nir_deref_instr_parent(deref); assert(glsl_type_is_array(parent->type) || glsl_type_is_matrix(parent->type)); deref->type = glsl_get_array_element(parent->type); } break; } case nir_instr_type_intrinsic: { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); /* If we have a copy whose source or destination has been deleted * because we determined the variable was dead, then we just * delete the copy instruction. If the source variable was dead * then it was writing undefined garbage anyway and if it's the * destination variable that's dead then the write isn't needed. */ if (intrin->intrinsic == nir_intrinsic_copy_deref) { nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) || vec_deref_is_dead_or_oob(src, var_usage_map, modes)) { nir_instr_remove(&intrin->instr); nir_deref_instr_remove_if_unused(dst); nir_deref_instr_remove_if_unused(src); } continue; } if (intrin->intrinsic != nir_intrinsic_load_deref && intrin->intrinsic != nir_intrinsic_store_deref) continue; nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); if (!(deref->mode & modes)) continue; struct vec_var_usage *usage = get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); if (!usage) continue; if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) { if (intrin->intrinsic == nir_intrinsic_load_deref) { nir_ssa_def *u = nir_ssa_undef(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(u)); } nir_instr_remove(&intrin->instr); nir_deref_instr_remove_if_unused(deref); continue; } if (intrin->intrinsic == nir_intrinsic_load_deref) { b.cursor = nir_after_instr(&intrin->instr); nir_ssa_def *undef = nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size); nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS]; unsigned c = 0; for (unsigned i = 0; i < intrin->num_components; i++) { if (usage->comps_kept & (1u << i)) vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++); else vec_srcs[i] = undef; } nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components); nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, nir_src_for_ssa(vec), vec->parent_instr); /* The SSA def is now only used by the swizzle. It's safe to * shrink the number of components. */ assert(list_length(&intrin->dest.ssa.uses) == c); intrin->num_components = c; intrin->dest.ssa.num_components = c; } else { nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); unsigned swizzle[NIR_MAX_VEC_COMPONENTS]; nir_component_mask_t new_write_mask = 0; unsigned c = 0; for (unsigned i = 0; i < intrin->num_components; i++) { if (usage->comps_kept & (1u << i)) { swizzle[c] = i; if (write_mask & (1u << i)) new_write_mask |= 1u << c; c++; } } b.cursor = nir_before_instr(&intrin->instr); nir_ssa_def *swizzled = nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false); /* Rewrite to use the compacted source */ nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], nir_src_for_ssa(swizzled)); nir_intrinsic_set_write_mask(intrin, new_write_mask); intrin->num_components = c; } break; } default: break; } } } } static bool function_impl_has_vars_with_modes(nir_function_impl *impl, nir_variable_mode modes) { nir_shader *shader = impl->function->shader; if ((modes & nir_var_global) && !exec_list_is_empty(&shader->globals)) return true; if ((modes & nir_var_local) && !exec_list_is_empty(&impl->locals)) return true; return false; } /** Attempt to shrink arrays of vectors * * This pass looks at variables which contain a vector or an array (possibly * multiple dimensions) of vectors and attempts to lower to a smaller vector * or array. If the pass can prove that a component of a vector (or array of * vectors) is never really used, then that component will be removed. * Similarly, the pass attempts to shorten arrays based on what elements it * can prove are never read or never contain valid data. */ bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes) { assert((modes & (nir_var_global | nir_var_local)) == modes); void *mem_ctx = ralloc_context(NULL); struct hash_table *var_usage_map = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); bool has_vars_to_shrink = false; nir_foreach_function(function, shader) { if (!function->impl) continue; /* Don't even bother crawling the IR if we don't have any variables. * Given that this pass deletes any unused variables, it's likely that * we will be in this scenario eventually. */ if (function_impl_has_vars_with_modes(function->impl, modes)) { has_vars_to_shrink = true; find_used_components_impl(function->impl, var_usage_map, modes, mem_ctx); } } if (!has_vars_to_shrink) { ralloc_free(mem_ctx); return false; } bool globals_shrunk = false; if (modes & nir_var_global) globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map); bool progress = false; nir_foreach_function(function, shader) { if (!function->impl) continue; bool locals_shrunk = false; if (modes & nir_var_local) { locals_shrunk = shrink_vec_var_list(&function->impl->locals, var_usage_map); } if (globals_shrunk || locals_shrunk) { shrink_vec_var_access_impl(function->impl, var_usage_map, modes); nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); progress = true; } } ralloc_free(mem_ctx); return progress; }