aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2017-03-02 17:10:24 -0800
committerJason Ekstrand <[email protected]>2017-03-14 07:36:20 -0700
commit3c312be7b3e95ec7540e98abed1b6f3cc8d31b2a (patch)
tree77461dca97a8af61c35a4c0b9200ad6fbbf9d977
parent60d1aac28a1f44ac166e72262e378e063155d6fd (diff)
nir/copy_prop: Respect the source's number of components
In the near future we are going to require that the num_components in a src dereference match the num_components of the SSA value being dereferenced. To do that, we need copy_prop to not remove our MOVs from a larger SSA value into an instruction that uses fewer channels. Because we suddenly have to know how many components each source has, this makes the pass a bit more complicated. Fortunately, copy propagation is the only pass that cares about the number of components are read by any given source so it's fairly contained. Shader-db results on Sky Lake: total instructions in shared programs: 13318947 -> 13320265 (0.01%) instructions in affected programs: 260633 -> 261951 (0.51%) helped: 324 HURT: 1027 Looking through the hurt programs, about a dozen are hurt by 3 instructions and the rest are all hurt by 2 instructions. From a spot-check of the shaders, the story is always the same: They get a vec4 from somewhere (frequently an input) and use the first two or three components as a texture coordinate. Because of the vector component mismatch, we have a mov or, more likely, a vecN sitting between the texture instruction and the input. This means that the back-end inserts a bunch of MOVs and split_virtual_grfs() goes to town. Because the texture coordinate is also used by some other calculation, register coalesce can't combine them back together and we end up with an extra 2 MOV instructions in our shader. Reviewed-by: Eric Anholt <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Reviewed-by: Connor Abbott <[email protected]>
-rw-r--r--src/compiler/nir/nir_opt_copy_propagate.c129
1 files changed, 96 insertions, 33 deletions
diff --git a/src/compiler/nir/nir_opt_copy_propagate.c b/src/compiler/nir/nir_opt_copy_propagate.c
index c26e07fda71..c4001fa73f5 100644
--- a/src/compiler/nir/nir_opt_copy_propagate.c
+++ b/src/compiler/nir/nir_opt_copy_propagate.c
@@ -99,11 +99,12 @@ is_swizzleless_move(nir_alu_instr *instr)
}
static bool
-copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if,
+ unsigned num_components)
{
if (!src->is_ssa) {
if (src->reg.indirect)
- return copy_prop_src(src->reg.indirect, parent_instr, parent_if);
+ return copy_prop_src(src->reg.indirect, parent_instr, parent_if, 1);
return false;
}
@@ -115,17 +116,8 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
if (!is_swizzleless_move(alu_instr))
return false;
- /* Don't let copy propagation land us with a phi that has more
- * components in its source than it has in its destination. That badly
- * messes up out-of-ssa.
- */
- if (parent_instr && parent_instr->type == nir_instr_type_phi) {
- nir_phi_instr *phi = nir_instr_as_phi(parent_instr);
- assert(phi->dest.is_ssa);
- if (phi->dest.ssa.num_components !=
- alu_instr->src[0].src.ssa->num_components)
- return false;
- }
+ if (alu_instr->src[0].src.ssa->num_components != num_components)
+ return false;
if (parent_instr) {
nir_instr_rewrite_src(parent_instr, src,
@@ -146,7 +138,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
if (!src->src.is_ssa) {
if (src->src.reg.indirect)
return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr,
- NULL);
+ NULL, 1);
return false;
}
@@ -193,51 +185,122 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
return true;
}
-typedef struct {
- nir_instr *parent_instr;
- bool progress;
-} copy_prop_state;
+static bool
+copy_prop_dest(nir_dest *dest, nir_instr *instr)
+{
+ if (!dest->is_ssa && dest->reg.indirect)
+ return copy_prop_src(dest->reg.indirect, instr, NULL, 1);
+
+ return false;
+}
static bool
-copy_prop_src_cb(nir_src *src, void *_state)
+copy_prop_deref_var(nir_instr *instr, nir_deref_var *deref_var)
{
- copy_prop_state *state = (copy_prop_state *) _state;
- while (copy_prop_src(src, state->parent_instr, NULL))
- state->progress = true;
+ if (!deref_var)
+ return false;
- return true;
+ bool progress = false;
+ for (nir_deref *deref = deref_var->deref.child;
+ deref; deref = deref->child) {
+ if (deref->deref_type != nir_deref_type_array)
+ continue;
+
+ nir_deref_array *arr = nir_deref_as_array(deref);
+ if (arr->deref_array_type != nir_deref_array_type_indirect)
+ continue;
+
+ while (copy_prop_src(&arr->indirect, instr, NULL, 1))
+ progress = true;
+ }
+ return progress;
}
static bool
copy_prop_instr(nir_instr *instr)
{
- if (instr->type == nir_instr_type_alu) {
+ bool progress = false;
+ switch (instr->type) {
+ case nir_instr_type_alu: {
nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
- bool progress = false;
for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
while (copy_prop_alu_src(alu_instr, i))
progress = true;
- if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
- while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
+ while (copy_prop_dest(&alu_instr->dest.dest, instr))
+ progress = true;
+
+ return progress;
+ }
+
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ unsigned num_components = nir_tex_instr_src_size(tex, i);
+ while (copy_prop_src(&tex->src[i].src, instr, NULL, num_components))
progress = true;
+ }
+
+ if (copy_prop_deref_var(instr, tex->texture))
+ progress = true;
+ if (copy_prop_deref_var(instr, tex->sampler))
+ progress = true;
+
+ while (copy_prop_dest(&tex->dest, instr))
+ progress = true;
+
+ return progress;
+ }
+
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ for (unsigned i = 0;
+ i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
+ unsigned num_components =
+ nir_intrinsic_infos[intrin->intrinsic].src_components[i];
+ if (!num_components)
+ num_components = intrin->num_components;
+
+ while (copy_prop_src(&intrin->src[i], instr, NULL, num_components))
+ progress = true;
+ }
+
+ for (unsigned i = 0;
+ i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
+ if (copy_prop_deref_var(instr, intrin->variables[i]))
+ progress = true;
+ }
+
+ if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
+ while (copy_prop_dest(&intrin->dest, instr))
+ progress = true;
+ }
return progress;
}
- copy_prop_state state;
- state.parent_instr = instr;
- state.progress = false;
- nir_foreach_src(instr, copy_prop_src_cb, &state);
+ case nir_instr_type_phi: {
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ assert(phi->dest.is_ssa);
+ unsigned num_components = phi->dest.ssa.num_components;
+ nir_foreach_phi_src(src, phi) {
+ while (copy_prop_src(&src->src, instr, NULL, num_components))
+ progress = true;
+ }
+
+ return progress;
+ }
- return state.progress;
+ default:
+ return false;
+ }
}
static bool
copy_prop_if(nir_if *if_stmt)
{
- return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
+ return copy_prop_src(&if_stmt->condition, NULL, if_stmt, 1);
}
static bool