diff options
author | Matt Turner <[email protected]> | 2016-08-24 19:25:58 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2016-08-25 13:45:39 -0700 |
commit | e53130cc27b966a09d48be53cb51e09ea7ad0649 (patch) | |
tree | 8e916256d605c40e31a535c069442fcf1272f5f1 /src | |
parent | a491b9e945423a81f785220b85a9b395060952aa (diff) |
nir: Walk blocks in source code order in lower_vars_to_ssa.
Prior to this commit rename_variables_block() is recursively called,
performing a depth-first traversal of the control flow graph. The
function uses a non-trivial amount of stack space for local variables,
which puts us in danger of smashing the stack, given a sufficiently deep
dominance tree.
XCOM: Enemy Within contains a shader with such a dominance tree (1574
nir_blocks in total, depth of at least 143).
Jason tells me that he believes that any walk over the nir_blocks that
respects dominance is sufficient (a DFS might have been necessary prior
to the introduction of nir_phi_builder).
In fact, the introduction of nir_phi_builder made the problem worse:
rename_variables_block(), walks to the bottom of the dominance tree
before calling nir_phi_builder_value_get_block_def() which walks back to
the top of the dominance tree...
In any case, this patch ensures we avoid that problem as well.
Cc: [email protected]
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97225
Reviewed-by: Connor Abbott <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler/nir/nir_lower_vars_to_ssa.c | 209 | ||||
-rw-r--r-- | src/compiler/nir/nir_phi_builder.h | 3 |
2 files changed, 106 insertions, 106 deletions
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c index 317647bf9e4..25dc70c4618 100644 --- a/src/compiler/nir/nir_lower_vars_to_ssa.c +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c @@ -471,7 +471,7 @@ lower_copies_to_load_store(struct deref_node *node, return true; } -/* Performs variable renaming by doing a DFS of the dominance tree +/* Performs variable renaming * * This algorithm is very similar to the one outlined in "Efficiently * Computing Static Single Assignment Form and the Control Dependence @@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node, * SSA def on the stack per block. */ static bool -rename_variables_block(nir_block *block, struct lower_variables_state *state) +rename_variables(struct lower_variables_state *state) { nir_builder b; nir_builder_init(&b, state->impl); - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* If we hit this path then we are referencing an invalid - * value. Most likely, we unrolled something and are - * reading past the end of some array. In any case, this - * should result in an undefined value. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - intrin->num_components, - intrin->dest.ssa.bit_size); - - nir_instr_insert_before(&intrin->instr, &undef->instr); - nir_instr_remove(&intrin->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&undef->def)); + nir_foreach_block(block, state->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) continue; - } - if (!node->lower_to_ssa) - continue; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); + + if (node == NULL) { + /* If we hit this path then we are referencing an invalid + * value. Most likely, we unrolled something and are + * reading past the end of some array. In any case, this + * should result in an undefined value. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->shader, + intrin->num_components, + intrin->dest.ssa.bit_size); + + nir_instr_insert_before(&intrin->instr, &undef->instr); + nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&undef->def)); + continue; + } - nir_alu_instr *mov = nir_alu_instr_create(state->shader, - nir_op_imov); - mov->src[0].src = nir_src_for_ssa( - nir_phi_builder_value_get_block_def(node->pb_value, block)); - for (unsigned i = intrin->num_components; i < 4; i++) - mov->src[0].swizzle[i] = 0; + if (!node->lower_to_ssa) + continue; - assert(intrin->dest.is_ssa); + nir_alu_instr *mov = nir_alu_instr_create(state->shader, + nir_op_imov); + mov->src[0].src = nir_src_for_ssa( + nir_phi_builder_value_get_block_def(node->pb_value, block)); + for (unsigned i = intrin->num_components; i < 4; i++) + mov->src[0].swizzle[i] = 0; - mov->dest.write_mask = (1 << intrin->num_components) - 1; - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - intrin->num_components, - intrin->dest.ssa.bit_size, NULL); + assert(intrin->dest.is_ssa); - nir_instr_insert_before(&intrin->instr, &mov->instr); - nir_instr_remove(&intrin->instr); + mov->dest.write_mask = (1 << intrin->num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, + intrin->dest.ssa.bit_size, NULL); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - break; - } - - case nir_intrinsic_store_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* Probably an out-of-bounds array store. That should be a - * no-op. */ + nir_instr_insert_before(&intrin->instr, &mov->instr); nir_instr_remove(&intrin->instr); - continue; - } - if (!node->lower_to_ssa) - continue; - - assert(intrin->num_components == - glsl_get_vector_elements(node->type)); - - assert(intrin->src[0].is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa)); + break; + } - nir_ssa_def *new_def; - b.cursor = nir_before_instr(&intrin->instr); + case nir_intrinsic_store_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); - unsigned wrmask = nir_intrinsic_write_mask(intrin); - if (wrmask == (1 << intrin->num_components) - 1) { - /* Whole variable store - just copy the source. Note that - * intrin->num_components and intrin->src[0].ssa->num_components - * may differ. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < intrin->num_components ? i : 0; + if (node == NULL) { + /* Probably an out-of-bounds array store. That should be a + * no-op. */ + nir_instr_remove(&intrin->instr); + continue; + } - new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, - intrin->num_components, false); - } else { - nir_ssa_def *old_def = - nir_phi_builder_value_get_block_def(node->pb_value, block); - /* For writemasked store_var intrinsics, we combine the newly - * written values with the existing contents of unwritten - * channels, creating a new SSA value for the whole vector. - */ - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < intrin->num_components; i++) { - if (wrmask & (1 << i)) { - srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); - } else { - srcs[i] = nir_channel(&b, old_def, i); + if (!node->lower_to_ssa) + continue; + + assert(intrin->num_components == + glsl_get_vector_elements(node->type)); + + assert(intrin->src[0].is_ssa); + + nir_ssa_def *new_def; + b.cursor = nir_before_instr(&intrin->instr); + + unsigned wrmask = nir_intrinsic_write_mask(intrin); + if (wrmask == (1 << intrin->num_components) - 1) { + /* Whole variable store - just copy the source. Note that + * intrin->num_components and intrin->src[0].ssa->num_components + * may differ. + */ + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < intrin->num_components ? i : 0; + + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, + intrin->num_components, false); + } else { + nir_ssa_def *old_def = + nir_phi_builder_value_get_block_def(node->pb_value, block); + /* For writemasked store_var intrinsics, we combine the newly + * written values with the existing contents of unwritten + * channels, creating a new SSA value for the whole vector. + */ + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < intrin->num_components; i++) { + if (wrmask & (1 << i)) { + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); + } else { + srcs[i] = nir_channel(&b, old_def, i); + } } + new_def = nir_vec(&b, srcs, intrin->num_components); } - new_def = nir_vec(&b, srcs, intrin->num_components); - } - assert(new_def->num_components == intrin->num_components); + assert(new_def->num_components == intrin->num_components); - nir_phi_builder_value_set_block_def(node->pb_value, block, new_def); - nir_instr_remove(&intrin->instr); - break; - } + nir_phi_builder_value_set_block_def(node->pb_value, block, new_def); + nir_instr_remove(&intrin->instr); + break; + } - default: - break; + default: + break; + } } } - for (unsigned i = 0; i < block->num_dom_children; ++i) - rename_variables_block(block->dom_children[i], state); - return true; } @@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) } } - rename_variables_block(nir_start_block(impl), &state); + rename_variables(&state); nir_phi_builder_finish(state.phi_builder); diff --git a/src/compiler/nir/nir_phi_builder.h b/src/compiler/nir/nir_phi_builder.h index edc530268c2..a4dc18a2b22 100644 --- a/src/compiler/nir/nir_phi_builder.h +++ b/src/compiler/nir/nir_phi_builder.h @@ -44,7 +44,8 @@ * var.pb_val = nir_phi_builder_add_value(pb, var.defs) * * // Visit each block. This needs to visit dominators first; - * // nir_for_each_block() will be ok. + * // nir_foreach_block() will be ok. + * * foreach block: * foreach instruction: * foreach use of variable var: |