From d477beab077106d8e76a80298a685f16f4c4e85e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 13 Nov 2014 17:16:31 -0800 Subject: nir: Add a pass to lower local variable accesses to SSA values This pass analizes all of the load/store operations and, when a variable is never aliased (potentially used by an indirect operation), it is lowered directly to an SSA value. This pass translates to SSA directly and does not require any fixup by the original to-SSA pass. Reviewed-by: Connor Abbott --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_lower_variables.c | 1066 ++++++++++++++++++++++++++++++++++++ 3 files changed, 1069 insertions(+) create mode 100644 src/glsl/nir/nir_lower_variables.c (limited to 'src/glsl') diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 84245bcb3c6..1d3b0499aaa 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -24,6 +24,7 @@ NIR_FILES = \ $(GLSL_SRCDIR)/nir/nir_lower_atomics.c \ $(GLSL_SRCDIR)/nir/nir_lower_samplers.cpp \ $(GLSL_SRCDIR)/nir/nir_lower_system_values.c \ + $(GLSL_SRCDIR)/nir/nir_lower_variables.c \ $(GLSL_SRCDIR)/nir/nir_lower_variables_scalar.c \ $(GLSL_SRCDIR)/nir/nir_lower_vec_to_movs.c \ $(GLSL_SRCDIR)/nir/nir_metadata.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 8664fc8d47a..9d207bd4255 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1358,6 +1358,8 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp); void nir_split_var_copies(nir_shader *shader); +void nir_lower_variables(nir_shader *shader); + void nir_lower_variables_scalar(nir_shader *shader, bool lower_globals, bool lower_io, bool add_names, bool native_integers); diff --git a/src/glsl/nir/nir_lower_variables.c b/src/glsl/nir/nir_lower_variables.c new file mode 100644 index 00000000000..ed3e0fd2876 --- /dev/null +++ b/src/glsl/nir/nir_lower_variables.c @@ -0,0 +1,1066 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +struct deref_node { + struct deref_node *parent; + const struct glsl_type *type; + + bool lower_to_ssa; + + struct set *loads; + struct set *stores; + struct set *copies; + + nir_ssa_def **def_stack; + nir_ssa_def **def_stack_tail; + + struct deref_node *wildcard; + struct deref_node *indirect; + struct deref_node *children[0]; +}; + +struct lower_variables_state { + void *mem_ctx; + void *dead_ctx; + nir_function_impl *impl; + + /* A hash table mapping variables to deref_node data */ + struct hash_table *deref_var_nodes; + /* A hash table mapping dereference leaves to deref_node data */ + struct hash_table *deref_leaves; + + /* A hash table mapping phi nodes to deref_state data */ + struct hash_table *phi_table; +}; + +/* The following two functions implement a hash and equality check for + * variable dreferences. When the hash or equality function encounters an + * array, all indirects are treated as equal and are never equal to a + * direct dereference or a wildcard. + */ +static uint32_t +hash_deref(const void *void_deref) +{ + const nir_deref *deref = void_deref; + + uint32_t hash; + if (deref->child) { + hash = hash_deref(deref->child); + } else { + hash = 2166136261ul; + } + + switch (deref->deref_type) { + case nir_deref_type_var: + hash ^= _mesa_hash_pointer(nir_deref_as_var(deref)->var); + break; + case nir_deref_type_array: { + nir_deref_array *array = nir_deref_as_array(deref); + hash += 268435183 * array->deref_array_type; + if (array->deref_array_type == nir_deref_array_type_direct) + hash ^= array->base_offset; /* Some prime */ + break; + } + case nir_deref_type_struct: + hash ^= nir_deref_as_struct(deref)->index; + break; + } + + return hash * 0x01000193; +} + +static bool +derefs_equal(const void *void_a, const void *void_b) +{ + const nir_deref *a = void_a; + const nir_deref *b = void_b; + + if (a->deref_type != b->deref_type) + return false; + + switch (a->deref_type) { + case nir_deref_type_var: + if (nir_deref_as_var(a)->var != nir_deref_as_var(b)->var) + return false; + break; + case nir_deref_type_array: { + nir_deref_array *a_arr = nir_deref_as_array(a); + nir_deref_array *b_arr = nir_deref_as_array(b); + + if (a_arr->deref_array_type != b_arr->deref_array_type) + return false; + + if (a_arr->deref_array_type == nir_deref_array_type_direct && + a_arr->base_offset != b_arr->base_offset) + return false; + break; + } + case nir_deref_type_struct: + if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index) + return false; + break; + default: + unreachable("Invalid dreference type"); + } + + assert((a->child == NULL) == (b->child == NULL)); + if (a->child) + return derefs_equal(a->child, b->child); + else + return true; +} + +static int +type_get_length(const struct glsl_type *type) +{ + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + return glsl_get_length(type); + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(type)) + return glsl_get_matrix_columns(type); + else + return glsl_get_vector_elements(type); + default: + unreachable("Invalid deref base type"); + } +} + +static struct deref_node * +deref_node_create(struct deref_node *parent, + const struct glsl_type *type, void *mem_ctx) +{ + size_t size = sizeof(struct deref_node) + + type_get_length(type) * sizeof(struct deref_node *); + + struct deref_node *node = rzalloc_size(mem_ctx, size); + node->type = type; + node->parent = parent; + + return node; +} + +static struct deref_node * +get_deref_node(nir_deref_var *deref, bool add_to_leaves, + struct lower_variables_state *state) +{ + bool is_leaf = true; + struct deref_node *parent = NULL; + nir_deref *tail = &deref->deref; + while (tail) { + struct deref_node *node; + + switch (tail->deref_type) { + case nir_deref_type_var: { + assert(tail == &deref->deref); + assert(parent == NULL); + + struct hash_entry *entry = + _mesa_hash_table_search(state->deref_var_nodes, deref->var); + if (entry) { + node = entry->data; + } else { + node = deref_node_create(NULL, tail->type, state->dead_ctx); + _mesa_hash_table_insert(state->deref_var_nodes, deref->var, node); + } + break; + } + + case nir_deref_type_struct: { + assert(parent != NULL); + + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + assert(deref_struct->index < type_get_length(parent->type)); + if (parent->children[deref_struct->index]) { + node = parent->children[deref_struct->index]; + } else { + node = deref_node_create(parent, tail->type, state->dead_ctx); + parent->children[deref_struct->index] = node; + } + break; + } + + case nir_deref_type_array: { + assert(parent != NULL); + + nir_deref_array *arr = nir_deref_as_array(tail); + switch (arr->deref_array_type) { + case nir_deref_array_type_direct: + if (arr->base_offset >= type_get_length(parent->type)) { + /* This is possible if a loop unrolls and generates an + * out-of-bounds offset. We need to handle this at least + * somewhat gracefully. + */ + return NULL; + } else if (parent->children[arr->base_offset]) { + node = parent->children[arr->base_offset]; + } else { + node = deref_node_create(parent, tail->type, state->dead_ctx); + parent->children[arr->base_offset] = node; + } + break; + case nir_deref_array_type_indirect: + if (parent->indirect) { + node = parent->indirect; + } else { + node = deref_node_create(parent, tail->type, state->dead_ctx); + parent->indirect = node; + } + is_leaf = false; + break; + case nir_deref_array_type_wildcard: + if (parent->wildcard) { + node = parent->wildcard; + } else { + node = deref_node_create(parent, tail->type, state->dead_ctx); + parent->wildcard = node; + } + is_leaf = false; + break; + default: + unreachable("Invalid array deref type"); + } + break; + } + default: + unreachable("Invalid deref type"); + } + + parent = node; + tail = tail->child; + } + + assert(parent); + + if (is_leaf && add_to_leaves) + _mesa_hash_table_insert(state->deref_leaves, deref, parent); + + return parent; +} + +static void +register_load_instr(nir_intrinsic_instr *load_instr, bool create_node, + struct lower_variables_state *state) +{ + struct deref_node *node = get_deref_node(load_instr->variables[0], + create_node, state); + if (node == NULL) + return; + + if (node->loads == NULL) + node->loads = _mesa_set_create(state->dead_ctx, + _mesa_key_pointer_equal); + + _mesa_set_add(node->loads, _mesa_hash_pointer(load_instr), load_instr); +} + +static void +register_store_instr(nir_intrinsic_instr *store_instr, bool create_node, + struct lower_variables_state *state) +{ + struct deref_node *node = get_deref_node(store_instr->variables[0], + create_node, state); + if (node == NULL) + return; + + if (node->stores == NULL) + node->stores = _mesa_set_create(state->dead_ctx, + _mesa_key_pointer_equal); + + _mesa_set_add(node->stores, _mesa_hash_pointer(store_instr), store_instr); +} + +static void +register_copy_instr(nir_intrinsic_instr *copy_instr, bool create_node, + struct lower_variables_state *state) +{ + for (unsigned idx = 0; idx < 2; idx++) { + struct deref_node *node = get_deref_node(copy_instr->variables[idx], + create_node, state); + if (node == NULL) + continue; + + if (node->copies == NULL) + node->copies = _mesa_set_create(state->dead_ctx, + _mesa_key_pointer_equal); + + _mesa_set_add(node->copies, _mesa_hash_pointer(copy_instr), copy_instr); + } +} + +static bool +foreach_deref_node_worker(struct deref_node *node, nir_deref *deref, + bool (* cb)(struct deref_node *node, + struct lower_variables_state *state), + struct lower_variables_state *state) +{ + if (deref->child == NULL) { + return cb(node, state); + } else { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(deref->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (node->children[arr->base_offset] && + !foreach_deref_node_worker(node->children[arr->base_offset], + deref->child, cb, state)) + return false; + + if (node->wildcard && + !foreach_deref_node_worker(node->wildcard, + deref->child, cb, state)) + return false; + + return true; + } + + case nir_deref_type_struct: { + nir_deref_struct *str = nir_deref_as_struct(deref->child); + return foreach_deref_node_worker(node->children[str->index], + deref->child, cb, state); + } + + default: + unreachable("Invalid deref child type"); + } + } +} + +static bool +foreach_deref_node_match(nir_deref_var *deref, + bool (* cb)(struct deref_node *node, + struct lower_variables_state *state), + struct lower_variables_state *state) +{ + nir_deref_var var_deref = *deref; + var_deref.deref.child = NULL; + struct deref_node *node = get_deref_node(&var_deref, false, state); + + if (node == NULL) + return false; + + return foreach_deref_node_worker(node, &deref->deref, cb, state); +} + +/* This question can only be asked about leaves. Searching down the tree + * is much harder than searching up. + */ +static bool +deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref, + struct lower_variables_state *state) +{ + if (deref->child == NULL) { + return false; + } else { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(deref->child); + if (arr->deref_array_type == nir_deref_array_type_indirect) + return true; + + assert(arr->deref_array_type == nir_deref_array_type_direct); + + if (node->children[arr->base_offset] && + deref_may_be_aliased_node(node->children[arr->base_offset], + deref->child, state)) + return true; + + if (node->wildcard && + deref_may_be_aliased_node(node->wildcard, deref->child, state)) + return true; + + return false; + } + + case nir_deref_type_struct: { + nir_deref_struct *str = nir_deref_as_struct(deref->child); + if (node->children[str->index]) { + return deref_may_be_aliased_node(node->children[str->index], + deref->child, state); + } else { + return false; + } + } + + default: + unreachable("Invalid nir_deref child type"); + } + } +} + +static bool +deref_may_be_aliased(nir_deref_var *deref, + struct lower_variables_state *state) +{ + nir_deref_var var_deref = *deref; + var_deref.deref.child = NULL; + struct deref_node *node = get_deref_node(&var_deref, false, state); + + /* An invalid dereference can't be aliased. */ + if (node == NULL) + return false; + + return deref_may_be_aliased_node(node, &deref->deref, state); +} + +static bool +fill_deref_tables_block(nir_block *block, void *void_state) +{ + struct lower_variables_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var_vec1: + case nir_intrinsic_load_var_vec2: + case nir_intrinsic_load_var_vec3: + case nir_intrinsic_load_var_vec4: + register_load_instr(intrin, true, state); + break; + + case nir_intrinsic_store_var_vec1: + case nir_intrinsic_store_var_vec2: + case nir_intrinsic_store_var_vec3: + case nir_intrinsic_store_var_vec4: + register_store_instr(intrin, true, state); + break; + + case nir_intrinsic_copy_var: + register_copy_instr(intrin, true, state); + break; + + default: + continue; + } + } + + return true; +} + +static nir_deref * +deref_next_wildcard_parent(nir_deref *deref) +{ + for (nir_deref *tail = deref; tail->child; tail = tail->child) { + if (tail->child->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail->child); + + if (arr->deref_array_type == nir_deref_array_type_wildcard) + return tail; + } + + return NULL; +} + +static nir_deref * +get_deref_tail(nir_deref *deref) +{ + while (deref->child) + deref = deref->child; + + return deref; +} + +static void +emit_copy_load_store(nir_intrinsic_instr *copy_instr, + nir_deref_var *dest_head, nir_deref_var *src_head, + nir_deref *dest_tail, nir_deref *src_tail, + struct lower_variables_state *state) +{ + nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail); + nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail); + + if (src_arr_parent || dest_arr_parent) { + assert(dest_arr_parent && dest_arr_parent); + + nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child); + nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child); + + unsigned length = type_get_length(src_arr_parent->type); + assert(length == type_get_length(dest_arr_parent->type)); + assert(length > 0); + + src_arr->deref_array_type = nir_deref_array_type_direct; + dest_arr->deref_array_type = nir_deref_array_type_direct; + for (unsigned i = 0; i < length; i++) { + src_arr->base_offset = i; + dest_arr->base_offset = i; + emit_copy_load_store(copy_instr, dest_head, src_head, + &dest_arr->deref, &src_arr->deref, state); + } + src_arr->deref_array_type = nir_deref_array_type_wildcard; + dest_arr->deref_array_type = nir_deref_array_type_wildcard; + } else { + /* Base case. Actually do the copy */ + src_tail = get_deref_tail(src_tail); + dest_tail = get_deref_tail(dest_tail); + + assert(src_tail->type == dest_tail->type); + + unsigned num_components = glsl_get_vector_elements(src_tail->type); + + nir_deref *src_deref = nir_copy_deref(state->mem_ctx, &src_head->deref); + nir_deref *dest_deref = nir_copy_deref(state->mem_ctx, &dest_head->deref); + + nir_intrinsic_op load_op; + switch (num_components) { + case 1: load_op = nir_intrinsic_load_var_vec1; break; + case 2: load_op = nir_intrinsic_load_var_vec2; break; + case 3: load_op = nir_intrinsic_load_var_vec3; break; + case 4: load_op = nir_intrinsic_load_var_vec4; break; + default: unreachable("Invalid number of components"); break; + } + + nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, + load_op); + load->variables[0] = nir_deref_as_var(src_deref); + load->dest.is_ssa = true; + nir_ssa_def_init(&load->instr, &load->dest.ssa, num_components, NULL); + + nir_instr_insert_before(©_instr->instr, &load->instr); + register_load_instr(load, false, state); + + nir_intrinsic_op store_op; + switch (num_components) { + case 1: store_op = nir_intrinsic_store_var_vec1; break; + case 2: store_op = nir_intrinsic_store_var_vec2; break; + case 3: store_op = nir_intrinsic_store_var_vec3; break; + case 4: store_op = nir_intrinsic_store_var_vec4; break; + default: unreachable("Invalid number of components"); break; + } + + nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, + store_op); + store->variables[0] = nir_deref_as_var(dest_deref); + store->src[0].is_ssa = true; + store->src[0].ssa = &load->dest.ssa; + + if (copy_instr->has_predicate) { + store->has_predicate = true; + store->predicate = nir_src_copy(copy_instr->predicate, state->mem_ctx); + } + + nir_instr_insert_before(©_instr->instr, &store->instr); + register_store_instr(store, false, state); + } +} + +static bool +lower_copies_to_load_store(struct deref_node *node, + struct lower_variables_state *state) +{ + if (!node->copies) + return true; + + struct set_entry *copy_entry; + set_foreach(node->copies, copy_entry) { + nir_intrinsic_instr *copy = (void *)copy_entry->key; + + emit_copy_load_store(copy, copy->variables[0], copy->variables[1], + ©->variables[0]->deref, + ©->variables[1]->deref, + state); + + for (unsigned i = 0; i < 2; ++i) { + struct deref_node *arg_node = get_deref_node(copy->variables[i], + false, state); + if (arg_node == NULL) + continue; + + struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, + copy_entry->hash, + copy); + assert(arg_entry); + _mesa_set_remove(node->copies, arg_entry); + } + + nir_instr_remove(©->instr); + } + + return true; +} + +static nir_load_const_instr * +get_const_initializer_load(const nir_deref_var *deref, + struct lower_variables_state *state) +{ + nir_constant *constant = deref->var->constant_initializer; + const nir_deref *tail = &deref->deref; + unsigned matrix_offset = 0; + while (tail->child) { + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (glsl_type_is_matrix(tail->type)) { + assert(arr->deref.child == NULL); + matrix_offset = arr->base_offset; + } else { + constant = constant->elements[arr->base_offset]; + } + break; + } + + case nir_deref_type_struct: { + constant = constant->elements[nir_deref_as_struct(tail->child)->index]; + break; + } + + default: + unreachable("Invalid deref child type"); + } + + tail = tail->child; + } + + nir_load_const_instr *load = nir_load_const_instr_create(state->mem_ctx); + load->array_elems = 0; + load->num_components = glsl_get_vector_elements(tail->type); + + matrix_offset *= load->num_components; + for (unsigned i = 0; i < load->num_components; i++) { + switch (glsl_get_base_type(tail->type)) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + load->value.u[i] = constant->value.u[matrix_offset + i]; + break; + case GLSL_TYPE_BOOL: + load->value.u[i] = constant->value.u[matrix_offset + i] ? + NIR_TRUE : NIR_FALSE; + break; + default: + unreachable("Invalid immediate type"); + } + } + + return load; +} + +static void +def_stack_push(struct deref_node *node, nir_ssa_def *def, + struct lower_variables_state *state) +{ + if (node->def_stack == NULL) { + node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *, + state->impl->num_blocks); + node->def_stack_tail = node->def_stack - 1; + } + + if (node->def_stack_tail >= node->def_stack) { + nir_ssa_def *top_def = *node->def_stack_tail; + + if (def->parent_instr->block == top_def->parent_instr->block) { + /* They're in the same block, just replace the top */ + *node->def_stack_tail = def; + return; + } + } + + *(++node->def_stack_tail) = def; +} + +static nir_ssa_def * +get_ssa_def_for_block(struct deref_node *node, nir_block *block, + struct lower_variables_state *state) +{ + if (node->def_stack) { + while (node->def_stack_tail >= node->def_stack) { + nir_ssa_def *def = *node->def_stack_tail; + + for (nir_block *dom = block; dom != NULL; dom = dom->imm_dom) { + if (def->parent_instr->block == dom) + return def; + } + + node->def_stack_tail--; + } + } + + /* If we got here then we don't have a definition that dominates the + * given block. This means that we need to add an undef and use that. + */ + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(state->mem_ctx); + nir_ssa_def_init(&undef->instr, &undef->def, + glsl_get_vector_elements(node->type), NULL); + nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); + def_stack_push(node, &undef->def, state); + return &undef->def; +} + +static void +add_phi_sources(nir_block *block, nir_block *pred, + struct lower_variables_state *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + struct hash_entry *entry = + _mesa_hash_table_search(state->phi_table, phi); + if (!entry) + continue; + + struct deref_node *node = entry->data; + + nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src); + src->pred = pred; + src->src.is_ssa = true; + src->src.ssa = get_ssa_def_for_block(node, pred, state); + + _mesa_set_add(src->src.ssa->uses, _mesa_hash_pointer(instr), instr); + + exec_list_push_tail(&phi->srcs, &src->node); + } +} + +static bool +lower_deref_to_ssa_block(nir_block *block, void *void_state) +{ + struct lower_variables_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(instr); + + struct hash_entry *entry = + _mesa_hash_table_search(state->phi_table, phi); + + /* This can happen if we already have phi nodes in the program + * that were not created in this pass. + */ + if (!entry) + continue; + + struct deref_node *node = entry->data; + + def_stack_push(node, &phi->dest.ssa, state); + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var_vec1: + case nir_intrinsic_load_var_vec2: + case nir_intrinsic_load_var_vec3: + case nir_intrinsic_load_var_vec4: { + struct deref_node *node = get_deref_node(intrin->variables[0], + false, state); + unsigned num_chans = + nir_intrinsic_infos[intrin->intrinsic].dest_components; + + if (node == NULL) { + /* If we hit this path then we are referencing an invalid + * value. Most likely, we unrolled something and are + * reading past the end of some array. In any case, this + * should result in an undefined value. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->mem_ctx); + nir_ssa_def_init(&undef->instr, &undef->def, num_chans, NULL); + + nir_instr_insert_before(&intrin->instr, &undef->instr); + nir_instr_remove(&intrin->instr); + + nir_src new_src = { + .is_ssa = true, + .ssa = &undef->def, + }; + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_src, + state->mem_ctx); + continue; + } + + if (!node->lower_to_ssa) + continue; + + nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_op_imov); + mov->src[0].src.is_ssa = true; + mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); + for (unsigned i = num_chans; i < 4; i++) + mov->src[0].swizzle[i] = 0; + + assert(intrin->dest.is_ssa); + + mov->dest.write_mask = (1 << num_chans) - 1; + mov->dest.dest.is_ssa = true; + nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL); + + nir_instr_insert_before(&intrin->instr, &mov->instr); + nir_instr_remove(&intrin->instr); + + nir_src new_src = { + .is_ssa = true, + .ssa = &mov->dest.dest.ssa, + }; + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_src, + state->mem_ctx); + break; + } + + case nir_intrinsic_store_var_vec1: + case nir_intrinsic_store_var_vec2: + case nir_intrinsic_store_var_vec3: + case nir_intrinsic_store_var_vec4: { + struct deref_node *node = get_deref_node(intrin->variables[0], + false, state); + + if (node == NULL) { + /* Probably an out-of-bounds array store. That should be a + * no-op. */ + nir_instr_remove(&intrin->instr); + continue; + } + + if (!node->lower_to_ssa) + continue; + + unsigned num_chans = glsl_get_vector_elements(node->type); + + assert(intrin->src[0].is_ssa); + + nir_alu_instr *mov; + if (intrin->has_predicate) { + mov = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel); + mov->src[0].src = nir_src_copy(intrin->predicate, + state->mem_ctx); + memset(mov->src[0].swizzle, 0, sizeof mov->src[0].swizzle); + + mov->src[1].src.is_ssa = true; + mov->src[1].src.ssa = intrin->src[0].ssa; + for (unsigned i = num_chans; i < 4; i++) + mov->src[1].swizzle[i] = 0; + + mov->src[2].src.is_ssa = true; + mov->src[2].src.ssa = get_ssa_def_for_block(node, block, state); + for (unsigned i = num_chans; i < 4; i++) + mov->src[2].swizzle[i] = 0; + + } else { + mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov); + + mov->src[0].src.is_ssa = true; + mov->src[0].src.ssa = intrin->src[0].ssa; + for (unsigned i = num_chans; i < 4; i++) + mov->src[0].swizzle[i] = 0; + } + + mov->dest.write_mask = (1 << num_chans) - 1; + mov->dest.dest.is_ssa = true; + nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL); + + nir_instr_insert_before(&intrin->instr, &mov->instr); + nir_instr_remove(&intrin->instr); + + def_stack_push(node, &mov->dest.dest.ssa, state); + break; + } + + default: + break; + } + } + } + + if (block->successors[0]) + add_phi_sources(block->successors[0], block, state); + if (block->successors[1]) + add_phi_sources(block->successors[1], block, state); + + return true; +} + +static void +insert_phi_nodes(struct lower_variables_state *state) +{ + unsigned work[state->impl->num_blocks]; + unsigned has_already[state->impl->num_blocks]; + nir_block *W[state->impl->num_blocks]; + + memset(work, 0, sizeof work); + memset(has_already, 0, sizeof has_already); + + unsigned w_start, w_end; + unsigned iter_count = 0; + + struct hash_entry *deref_entry; + hash_table_foreach(state->deref_leaves, deref_entry) { + struct deref_node *node = deref_entry->data; + + if (node->stores == NULL) + continue; + + if (!node->lower_to_ssa) + continue; + + w_start = w_end = 0; + iter_count++; + + struct set_entry *store_entry; + set_foreach(node->stores, store_entry) { + nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key; + if (work[store->instr.block->index] < iter_count) + W[w_end++] = store->instr.block; + work[store->instr.block->index] = iter_count; + } + + while (w_start != w_end) { + nir_block *cur = W[w_start++]; + struct set_entry *dom_entry; + set_foreach(cur->dom_frontier, dom_entry) { + nir_block *next = (nir_block *) dom_entry->key; + + /* + * If there's more than one return statement, then the end block + * can be a join point for some definitions. However, there are + * no instructions in the end block, so nothing would use those + * phi nodes. Of course, we couldn't place those phi nodes + * anyways due to the restriction of having no instructions in the + * end block... + */ + if (next == state->impl->end_block) + continue; + + if (has_already[next->index] < iter_count) { + nir_phi_instr *phi = nir_phi_instr_create(state->mem_ctx); + phi->dest.is_ssa = true; + nir_ssa_def_init(&phi->instr, &phi->dest.ssa, + glsl_get_vector_elements(node->type), NULL); + nir_instr_insert_before_block(next, &phi->instr); + + _mesa_hash_table_insert(state->phi_table, phi, node); + + has_already[next->index] = iter_count; + if (work[next->index] < iter_count) { + work[next->index] = iter_count; + W[w_end++] = next; + } + } + } + } + } +} + +static bool +nir_lower_variables_impl(nir_function_impl *impl) +{ + struct lower_variables_state state; + + state.mem_ctx = ralloc_parent(impl); + state.dead_ctx = ralloc_context(state.mem_ctx); + state.impl = impl; + + state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + state.deref_leaves = _mesa_hash_table_create(state.dead_ctx, + hash_deref, derefs_equal); + state.phi_table = _mesa_hash_table_create(state.dead_ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + nir_foreach_block(impl, fill_deref_tables_block, &state); + + struct set *outputs = _mesa_set_create(state.dead_ctx, + _mesa_key_pointer_equal); + + bool progress = false; + + nir_metadata_require(impl, nir_metadata_block_index); + + struct hash_entry *entry; + hash_table_foreach(state.deref_leaves, entry) { + nir_deref_var *deref = (void *)entry->key; + struct deref_node *node = entry->data; + + if (deref->var->data.mode != nir_var_local) { + _mesa_hash_table_remove(state.deref_leaves, entry); + continue; + } + + if (deref_may_be_aliased(deref, &state)) { + _mesa_hash_table_remove(state.deref_leaves, entry); + continue; + } + + node->lower_to_ssa = true; + progress = true; + + if (deref->var->constant_initializer) { + nir_load_const_instr *load = get_const_initializer_load(deref, &state); + load->dest.is_ssa = true; + nir_ssa_def_init(&load->instr, &load->dest.ssa, + glsl_get_vector_elements(node->type), NULL); + nir_instr_insert_before_cf_list(&impl->body, &load->instr); + def_stack_push(node, &load->dest.ssa, &state); + } + + if (deref->var->data.mode == nir_var_shader_out) + _mesa_set_add(outputs, _mesa_hash_pointer(node), node); + + foreach_deref_node_match(deref, lower_copies_to_load_store, &state); + } + + if (!progress) + return false; + + nir_metadata_require(impl, nir_metadata_dominance); + + insert_phi_nodes(&state); + nir_foreach_block(impl, lower_deref_to_ssa_block, &state); + + nir_metadata_dirty(impl, nir_metadata_block_index | + nir_metadata_dominance); + + ralloc_free(state.dead_ctx); + + return progress; +} + +void +nir_lower_variables(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_variables_impl(overload->impl); + } +} -- cgit v1.2.3