summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-10-24 09:27:37 -0400
committerRob Clark <[email protected]>2014-10-25 12:07:43 -0400
commit13862812dc910a4ef57cb72cb9fe777ce3c14515 (patch)
treeebbb71fa5a4460fd56958845fd53061c4eae467b /src/gallium/drivers/freedreno
parent4dff2a642913cb9b72eccc3c290b1e5a71560156 (diff)
freedreno/ir3: consider instruction neighbors in cp
Fanin (merge) nodes require it's srcs to be "adjacent" in consecutive scalar registers. Keep track of instruction neighbors in copy- propagation step and avoid eliminating mov's which would cause an instruction to need multiple distinct left and/or right neighbors. This lets us not fall on our face when we encounter things like: 1: MOV TEMP[2], IN[0].xyzw 2: TEX OUT[0].xy, TEMP[2], SAMP[0], SHADOW2D 3: MOV TEMP[2].xy, IN[0].yxzz 4: TEX OUT[0].zw, TEMP[2], SAMP[0], SHADOW2D 5: END Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h13
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c176
2 files changed, 178 insertions, 11 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 20d97bafdfe..8a5e9fd687c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -216,6 +216,19 @@ struct ir3_instruction {
*/
#define DEPTH_UNUSED ~0
unsigned depth;
+
+ /* Used just during cp stage, which comes before depth pass.
+ * For fanin, where we need a sequence of consecutive registers,
+ * keep track of each src instructions left (ie 'n-1') and right
+ * (ie 'n+1') neighbor. The front-end must insert enough mov's
+ * to ensure that each instruction has at most one left and at
+ * most one right neighbor. During the copy-propagation pass,
+ * we only remove mov's when we can preserve this constraint.
+ */
+ struct {
+ struct ir3_instruction *left, *right;
+ uint16_t left_cnt, right_cnt;
+ } cp;
};
struct ir3_instruction *next;
#ifdef DEBUG
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 83bcb7a742b..2076b62acb8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -26,31 +26,101 @@
* Rob Clark <[email protected]>
*/
+#include "freedreno_util.h"
+
#include "ir3.h"
/*
* Copy Propagate:
*
- * TODO probably want some sort of visitor sort of interface to
- * avoid duplicating the same graph traversal logic everywhere..
- *
*/
static void block_cp(struct ir3_block *block);
static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep);
+/* XXX move this somewhere useful (and rename?) */
+static struct ir3_instruction *ssa(struct ir3_register *reg)
+{
+ if (reg->flags & IR3_REG_SSA)
+ return reg->instr;
+ return NULL;
+}
+
+static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
+{
+ return (a && b) && (a != b);
+}
+
+static void set_neighbors(struct ir3_instruction *instr,
+ struct ir3_instruction *left, struct ir3_instruction *right)
+{
+ debug_assert(!conflicts(instr->cp.left, left));
+ if (left) {
+ instr->cp.left_cnt++;
+ instr->cp.left = left;
+ }
+ debug_assert(!conflicts(instr->cp.right, right));
+ if (right) {
+ instr->cp.right_cnt++;
+ instr->cp.right = right;
+ }
+}
+
+/* remove neighbor reference, clearing left/right neighbor ptrs when
+ * there are no more references:
+ */
+static void remove_neighbors(struct ir3_instruction *instr)
+{
+ if (instr->cp.left) {
+ if (--instr->cp.left_cnt == 0)
+ instr->cp.left = NULL;
+ }
+ if (instr->cp.right) {
+ if (--instr->cp.right_cnt == 0)
+ instr->cp.right = NULL;
+ }
+}
+
+/* stop condition for iteration: */
+static bool check_stop(struct ir3_instruction *instr)
+{
+ if (ir3_instr_check_mark(instr))
+ return true;
+
+ /* stay within the block.. don't try to operate across
+ * basic block boundaries or we'll have problems when
+ * dealing with multiple basic blocks:
+ */
+ if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
+ return true;
+
+ return false;
+}
+
static bool is_eligible_mov(struct ir3_instruction *instr)
{
if ((instr->category == 1) &&
(instr->cat1.src_type == instr->cat1.dst_type)) {
struct ir3_register *dst = instr->regs[0];
struct ir3_register *src = instr->regs[1];
+ struct ir3_instruction *src_instr = ssa(src);
if (dst->flags & IR3_REG_ADDR)
return false;
- if ((src->flags & IR3_REG_SSA) &&
- /* TODO: propagate abs/neg modifiers if possible */
- !(src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV)))
+ /* TODO: propagate abs/neg modifiers if possible */
+ if (src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV))
+ return false;
+ if (src_instr) {
+ /* check that eliminating the move won't result in
+ * a neighbor conflict, ie. if an instruction feeds
+ * into multiple fanins it can still only have at
+ * most one left and one right neighbor:
+ */
+ if (conflicts(instr->cp.left, src_instr->cp.left))
+ return false;
+ if (conflicts(instr->cp.right, src_instr->cp.right))
+ return false;
return true;
+ }
}
return false;
}
@@ -95,6 +165,9 @@ instr_cp_fanin(struct ir3_instruction *instr)
/* we can't have 2 registers referring to the same instruction, so
* go through and check if any already refer to the candidate
* instruction. if so, don't do the propagation.
+ *
+ * NOTE: we need to keep this, despite the neighbor
+ * conflict checks, to avoid A<->B<->A..
*/
for (j = 1; j < instr->regs_count; j++)
if (instr->regs[j]->instr == cand)
@@ -107,22 +180,23 @@ instr_cp_fanin(struct ir3_instruction *instr)
walk_children(instr, false);
return instr;
-
}
static struct ir3_instruction *
instr_cp(struct ir3_instruction *instr, bool keep)
{
/* if we've already visited this instruction, bail now: */
- if (ir3_instr_check_mark(instr))
+ if (check_stop(instr))
return instr;
if (is_meta(instr) && (instr->opc == OPC_META_FI))
return instr_cp_fanin(instr);
- if (is_eligible_mov(instr) && !keep) {
- struct ir3_register *src = instr->regs[1];
- return instr_cp(src->instr, false);
+ if (!keep && is_eligible_mov(instr)) {
+ struct ir3_instruction *src_instr = ssa(instr->regs[1]);
+ set_neighbors(src_instr, instr->cp.left, instr->cp.right);
+ remove_neighbors(instr);
+ return instr_cp(src_instr, false);
}
walk_children(instr, false);
@@ -159,8 +233,88 @@ static void block_cp(struct ir3_block *block)
}
}
+/*
+ * Find instruction neighbors:
+ */
+
+static void instr_find_neighbors(struct ir3_instruction *instr)
+{
+ unsigned i;
+
+ if (check_stop(instr))
+ return;
+
+ if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
+ unsigned n = instr->regs_count;
+ for (i = 1; i < n; i++) {
+ struct ir3_instruction *src_instr = ssa(instr->regs[i]);
+ if (src_instr) {
+ struct ir3_instruction *left = (i > 1) ?
+ ssa(instr->regs[i-1]) : NULL;
+ struct ir3_instruction *right = (i < (n - 1)) ?
+ ssa(instr->regs[i+1]) : NULL;
+ set_neighbors(src_instr, left, right);
+ instr_find_neighbors(src_instr);
+ }
+ }
+ } else {
+ for (i = 1; i < instr->regs_count; i++) {
+ struct ir3_instruction *src_instr = ssa(instr->regs[i]);
+ if (src_instr)
+ instr_find_neighbors(src_instr);
+ }
+ }
+}
+
+static void block_find_neighbors(struct ir3_block *block)
+{
+ unsigned i;
+
+ for (i = 0; i < block->noutputs; i++) {
+ if (block->outputs[i]) {
+ struct ir3_instruction *instr = block->outputs[i];
+ instr_find_neighbors(instr);
+ }
+ }
+}
+
+static void instr_clear_neighbors(struct ir3_instruction *instr)
+{
+ unsigned i;
+
+ if (check_stop(instr))
+ return;
+
+ instr->cp.left_cnt = 0;
+ instr->cp.left = NULL;
+ instr->cp.right_cnt = 0;
+ instr->cp.right = NULL;
+
+ for (i = 1; i < instr->regs_count; i++) {
+ struct ir3_instruction *src_instr = ssa(instr->regs[i]);
+ if (src_instr)
+ instr_clear_neighbors(src_instr);
+ }
+}
+
+static void block_clear_neighbors(struct ir3_block *block)
+{
+ unsigned i;
+
+ for (i = 0; i < block->noutputs; i++) {
+ if (block->outputs[i]) {
+ struct ir3_instruction *instr = block->outputs[i];
+ instr_clear_neighbors(instr);
+ }
+ }
+}
+
void ir3_block_cp(struct ir3_block *block)
{
ir3_clear_mark(block->shader);
+ block_clear_neighbors(block);
+ ir3_clear_mark(block->shader);
+ block_find_neighbors(block);
+ ir3_clear_mark(block->shader);
block_cp(block);
}