summaryrefslogtreecommitdiffstats
path: root/src/glsl/nir
diff options
context:
space:
mode:
Diffstat (limited to 'src/glsl/nir')
-rw-r--r--src/glsl/nir/nir_lower_vec_to_movs.c85
1 files changed, 85 insertions, 0 deletions
diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index 29dd0ca7235..9ff86ea7543 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -79,6 +79,88 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
return mov->dest.write_mask;
}
+/* Attempts to coalesce the "move" from the given source of the vec to the
+ * destination of the instruction generating the value. If, for whatever
+ * reason, we cannot coalesce the mmove, it does nothing and returns 0. We
+ * can then call insert_mov as normal.
+ */
+static unsigned
+try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
+{
+ assert(start_idx < nir_op_infos[vec->op].num_inputs);
+
+ /* We will only even try if the source is SSA */
+ if (!vec->src[start_idx].src.is_ssa)
+ return 0;
+
+ assert(vec->src[start_idx].src.ssa);
+
+ /* If we are going to do a reswizzle, then the vecN operation must be the
+ * only use of the source value. We also can't have any source modifiers.
+ */
+ nir_foreach_use(vec->src[start_idx].src.ssa, src) {
+ if (src->parent_instr != &vec->instr)
+ return 0;
+
+ nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
+ if (alu_src->abs || alu_src->negate)
+ return 0;
+ }
+
+ if (!list_empty(&vec->src[start_idx].src.ssa->if_uses))
+ return 0;
+
+ if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu)
+ return 0;
+
+ nir_alu_instr *src_alu =
+ nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr);
+
+ /* We only care about being able to re-swizzle the instruction if it is
+ * something that we can reswizzle. It must be per-component.
+ */
+ if (nir_op_infos[src_alu->op].output_size != 0)
+ return 0;
+
+ /* If we are going to reswizzle the instruction, we can't have any
+ * non-per-component sources either.
+ */
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+ return 0;
+
+ /* Stash off all of the ALU instruction's swizzles. */
+ uint8_t swizzles[4][4];
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ for (unsigned i = 0; i < 4; i++)
+ swizzles[j][i] = src_alu->src[j].swizzle[i];
+
+ unsigned write_mask = 0;
+ for (unsigned i = start_idx; i < 4; i++) {
+ if (!(vec->dest.write_mask & (1 << i)))
+ continue;
+
+ if (!vec->src[i].src.is_ssa ||
+ vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
+ continue;
+
+ /* At this point, the give vec source matchese up with the ALU
+ * instruction so we can re-swizzle that component to match.
+ */
+ write_mask |= 1 << i;
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+
+ /* Clear the no longer needed vec source */
+ nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT);
+ }
+
+ nir_instr_rewrite_dest(&src_alu->instr, &src_alu->dest.dest, vec->dest.dest);
+ src_alu->dest.write_mask = write_mask;
+
+ return write_mask;
+}
+
static bool
lower_vec_to_movs_block(nir_block *block, void *void_impl)
{
@@ -133,6 +215,9 @@ lower_vec_to_movs_block(nir_block *block, void *void_impl)
continue;
if (!(finished_write_mask & (1 << i)))
+ finished_write_mask |= try_coalesce(vec, i, shader);
+
+ if (!(finished_write_mask & (1 << i)))
finished_write_mask |= insert_mov(vec, i, shader);
}