diff options
author | Jason Ekstrand <[email protected]> | 2015-09-22 21:32:06 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-09-23 10:12:39 -0700 |
commit | 8dcbca59572a856ca554af9ab3f52120b6f2a929 (patch) | |
tree | 7b9d7133f641b1d1d2272c32800583047b49627c | |
parent | 65e80ce5b565953ffb07586790eeea51199ca9cf (diff) |
nir/lower_vec_to_movs: Don't emit unneeded movs
It's possible that, if a vecN operation is involved in a phi node, that we
could end up moving from a register to itself. If swizzling is involved,
we need to emit the move but. However, if there is no swizzling, then the
mov is a no-op and we might as well not bother emitting it.
Shader-db results on Haswell:
total instructions in shared programs: 6262536 -> 6259558 (-0.05%)
instructions in affected programs: 184780 -> 181802 (-1.61%)
helped: 838
HURT: 0
Reviewed-by: Eduardo Lima Mitev <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/glsl/nir/nir_lower_vec_to_movs.c | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c index 287f2bf3d8b..c08b721dae4 100644 --- a/src/glsl/nir/nir_lower_vec_to_movs.c +++ b/src/glsl/nir/nir_lower_vec_to_movs.c @@ -83,7 +83,25 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) } } - nir_instr_insert_before(&vec->instr, &mov->instr); + /* In some situations (if the vecN is involved in a phi-web), we can end + * up with a mov from a register to itself. Some of those channels may end + * up doing nothing and there's no reason to have them as part of the mov. + */ + if (src_matches_dest_reg(&mov->dest.dest, &mov->src[0].src) && + !mov->src[0].abs && !mov->src[0].negate) { + for (unsigned i = 0; i < 4; i++) { + if (mov->src[0].swizzle[i] == i) { + mov->dest.write_mask &= ~(1 << i); + } + } + } + + /* Only emit the instruction if it actually does something */ + if (mov->dest.write_mask) { + nir_instr_insert_before(&vec->instr, &mov->instr); + } else { + ralloc_free(mov); + } return mov->dest.write_mask; } |