diff options
author | Jason Ekstrand <jason.ekstrand@intel.com> | 2017-03-02 17:10:24 -0800 |
---|---|---|
committer | Jason Ekstrand <jason.ekstrand@intel.com> | 2017-03-14 07:36:20 -0700 |
commit | 3c312be7b3e95ec7540e98abed1b6f3cc8d31b2a (patch) | |
tree | 77461dca97a8af61c35a4c0b9200ad6fbbf9d977 /src | |
parent | 60d1aac28a1f44ac166e72262e378e063155d6fd (diff) |
nir/copy_prop: Respect the source's number of components
In the near future we are going to require that the num_components in a
src dereference match the num_components of the SSA value being
dereferenced. To do that, we need copy_prop to not remove our MOVs from
a larger SSA value into an instruction that uses fewer channels.
Because we suddenly have to know how many components each source has,
this makes the pass a bit more complicated. Fortunately, copy
propagation is the only pass that cares about the number of components
are read by any given source so it's fairly contained.
Shader-db results on Sky Lake:
total instructions in shared programs: 13318947 -> 13320265 (0.01%)
instructions in affected programs: 260633 -> 261951 (0.51%)
helped: 324
HURT: 1027
Looking through the hurt programs, about a dozen are hurt by 3
instructions and the rest are all hurt by 2 instructions. From a
spot-check of the shaders, the story is always the same: They get a
vec4 from somewhere (frequently an input) and use the first two or three
components as a texture coordinate. Because of the vector component
mismatch, we have a mov or, more likely, a vecN sitting between the
texture instruction and the input. This means that the back-end inserts
a bunch of MOVs and split_virtual_grfs() goes to town. Because the
texture coordinate is also used by some other calculation, register
coalesce can't combine them back together and we end up with an extra 2
MOV instructions in our shader.
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler/nir/nir_opt_copy_propagate.c | 129 |
1 files changed, 96 insertions, 33 deletions
diff --git a/src/compiler/nir/nir_opt_copy_propagate.c b/src/compiler/nir/nir_opt_copy_propagate.c index c26e07fda71..c4001fa73f5 100644 --- a/src/compiler/nir/nir_opt_copy_propagate.c +++ b/src/compiler/nir/nir_opt_copy_propagate.c @@ -99,11 +99,12 @@ is_swizzleless_move(nir_alu_instr *instr) } static bool -copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) +copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if, + unsigned num_components) { if (!src->is_ssa) { if (src->reg.indirect) - return copy_prop_src(src->reg.indirect, parent_instr, parent_if); + return copy_prop_src(src->reg.indirect, parent_instr, parent_if, 1); return false; } @@ -115,17 +116,8 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) if (!is_swizzleless_move(alu_instr)) return false; - /* Don't let copy propagation land us with a phi that has more - * components in its source than it has in its destination. That badly - * messes up out-of-ssa. - */ - if (parent_instr && parent_instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(parent_instr); - assert(phi->dest.is_ssa); - if (phi->dest.ssa.num_components != - alu_instr->src[0].src.ssa->num_components) - return false; - } + if (alu_instr->src[0].src.ssa->num_components != num_components) + return false; if (parent_instr) { nir_instr_rewrite_src(parent_instr, src, @@ -146,7 +138,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) if (!src->src.is_ssa) { if (src->src.reg.indirect) return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr, - NULL); + NULL, 1); return false; } @@ -193,51 +185,122 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) return true; } -typedef struct { - nir_instr *parent_instr; - bool progress; -} copy_prop_state; +static bool +copy_prop_dest(nir_dest *dest, nir_instr *instr) +{ + if (!dest->is_ssa && dest->reg.indirect) + return copy_prop_src(dest->reg.indirect, instr, NULL, 1); + + return false; +} static bool -copy_prop_src_cb(nir_src *src, void *_state) +copy_prop_deref_var(nir_instr *instr, nir_deref_var *deref_var) { - copy_prop_state *state = (copy_prop_state *) _state; - while (copy_prop_src(src, state->parent_instr, NULL)) - state->progress = true; + if (!deref_var) + return false; - return true; + bool progress = false; + for (nir_deref *deref = deref_var->deref.child; + deref; deref = deref->child) { + if (deref->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(deref); + if (arr->deref_array_type != nir_deref_array_type_indirect) + continue; + + while (copy_prop_src(&arr->indirect, instr, NULL, 1)) + progress = true; + } + return progress; } static bool copy_prop_instr(nir_instr *instr) { - if (instr->type == nir_instr_type_alu) { + bool progress = false; + switch (instr->type) { + case nir_instr_type_alu: { nir_alu_instr *alu_instr = nir_instr_as_alu(instr); - bool progress = false; for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) while (copy_prop_alu_src(alu_instr, i)) progress = true; - if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect) - while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL)) + while (copy_prop_dest(&alu_instr->dest.dest, instr)) + progress = true; + + return progress; + } + + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + for (unsigned i = 0; i < tex->num_srcs; i++) { + unsigned num_components = nir_tex_instr_src_size(tex, i); + while (copy_prop_src(&tex->src[i].src, instr, NULL, num_components)) progress = true; + } + + if (copy_prop_deref_var(instr, tex->texture)) + progress = true; + if (copy_prop_deref_var(instr, tex->sampler)) + progress = true; + + while (copy_prop_dest(&tex->dest, instr)) + progress = true; + + return progress; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + for (unsigned i = 0; + i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) { + unsigned num_components = + nir_intrinsic_infos[intrin->intrinsic].src_components[i]; + if (!num_components) + num_components = intrin->num_components; + + while (copy_prop_src(&intrin->src[i], instr, NULL, num_components)) + progress = true; + } + + for (unsigned i = 0; + i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) { + if (copy_prop_deref_var(instr, intrin->variables[i])) + progress = true; + } + + if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { + while (copy_prop_dest(&intrin->dest, instr)) + progress = true; + } return progress; } - copy_prop_state state; - state.parent_instr = instr; - state.progress = false; - nir_foreach_src(instr, copy_prop_src_cb, &state); + case nir_instr_type_phi: { + nir_phi_instr *phi = nir_instr_as_phi(instr); + assert(phi->dest.is_ssa); + unsigned num_components = phi->dest.ssa.num_components; + nir_foreach_phi_src(src, phi) { + while (copy_prop_src(&src->src, instr, NULL, num_components)) + progress = true; + } + + return progress; + } - return state.progress; + default: + return false; + } } static bool copy_prop_if(nir_if *if_stmt) { - return copy_prop_src(&if_stmt->condition, NULL, if_stmt); + return copy_prop_src(&if_stmt->condition, NULL, if_stmt, 1); } static bool |