summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-09-22 21:32:06 -0700
committerJason Ekstrand <[email protected]>2015-09-23 10:12:39 -0700
commit8dcbca59572a856ca554af9ab3f52120b6f2a929 (patch)
tree7b9d7133f641b1d1d2272c32800583047b49627c
parent65e80ce5b565953ffb07586790eeea51199ca9cf (diff)
nir/lower_vec_to_movs: Don't emit unneeded movs
It's possible that, if a vecN operation is involved in a phi node, that we could end up moving from a register to itself. If swizzling is involved, we need to emit the move but. However, if there is no swizzling, then the mov is a no-op and we might as well not bother emitting it. Shader-db results on Haswell: total instructions in shared programs: 6262536 -> 6259558 (-0.05%) instructions in affected programs: 184780 -> 181802 (-1.61%) helped: 838 HURT: 0 Reviewed-by: Eduardo Lima Mitev <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/glsl/nir/nir_lower_vec_to_movs.c20
1 files changed, 19 insertions, 1 deletions
diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 287f2bf3d8b..c08b721dae4 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -83,7 +83,25 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
}
}
- nir_instr_insert_before(&vec->instr, &mov->instr);
+ /* In some situations (if the vecN is involved in a phi-web), we can end
+ * up with a mov from a register to itself. Some of those channels may end
+ * up doing nothing and there's no reason to have them as part of the mov.
+ */
+ if (src_matches_dest_reg(&mov->dest.dest, &mov->src[0].src) &&
+ !mov->src[0].abs && !mov->src[0].negate) {
+ for (unsigned i = 0; i < 4; i++) {
+ if (mov->src[0].swizzle[i] == i) {
+ mov->dest.write_mask &= ~(1 << i);
+ }
+ }
+ }
+
+ /* Only emit the instruction if it actually does something */
+ if (mov->dest.write_mask) {
+ nir_instr_insert_before(&vec->instr, &mov->instr);
+ } else {
+ ralloc_free(mov);
+ }
return mov->dest.write_mask;
}