diff options
author | Rob Clark <[email protected]> | 2014-10-24 09:27:37 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2014-10-25 12:07:43 -0400 |
commit | 13862812dc910a4ef57cb72cb9fe777ce3c14515 (patch) | |
tree | ebbb71fa5a4460fd56958845fd53061c4eae467b | |
parent | 4dff2a642913cb9b72eccc3c290b1e5a71560156 (diff) |
freedreno/ir3: consider instruction neighbors in cp
Fanin (merge) nodes require it's srcs to be "adjacent" in consecutive
scalar registers. Keep track of instruction neighbors in copy-
propagation step and avoid eliminating mov's which would cause an
instruction to need multiple distinct left and/or right neighbors.
This lets us not fall on our face when we encounter things like:
1: MOV TEMP[2], IN[0].xyzw
2: TEX OUT[0].xy, TEMP[2], SAMP[0], SHADOW2D
3: MOV TEMP[2].xy, IN[0].yxzz
4: TEX OUT[0].zw, TEMP[2], SAMP[0], SHADOW2D
5: END
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cp.c | 176 |
2 files changed, 178 insertions, 11 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 20d97bafdfe..8a5e9fd687c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -216,6 +216,19 @@ struct ir3_instruction { */ #define DEPTH_UNUSED ~0 unsigned depth; + + /* Used just during cp stage, which comes before depth pass. + * For fanin, where we need a sequence of consecutive registers, + * keep track of each src instructions left (ie 'n-1') and right + * (ie 'n+1') neighbor. The front-end must insert enough mov's + * to ensure that each instruction has at most one left and at + * most one right neighbor. During the copy-propagation pass, + * we only remove mov's when we can preserve this constraint. + */ + struct { + struct ir3_instruction *left, *right; + uint16_t left_cnt, right_cnt; + } cp; }; struct ir3_instruction *next; #ifdef DEBUG diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 83bcb7a742b..2076b62acb8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -26,31 +26,101 @@ * Rob Clark <[email protected]> */ +#include "freedreno_util.h" + #include "ir3.h" /* * Copy Propagate: * - * TODO probably want some sort of visitor sort of interface to - * avoid duplicating the same graph traversal logic everywhere.. - * */ static void block_cp(struct ir3_block *block); static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep); +/* XXX move this somewhere useful (and rename?) */ +static struct ir3_instruction *ssa(struct ir3_register *reg) +{ + if (reg->flags & IR3_REG_SSA) + return reg->instr; + return NULL; +} + +static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b) +{ + return (a && b) && (a != b); +} + +static void set_neighbors(struct ir3_instruction *instr, + struct ir3_instruction *left, struct ir3_instruction *right) +{ + debug_assert(!conflicts(instr->cp.left, left)); + if (left) { + instr->cp.left_cnt++; + instr->cp.left = left; + } + debug_assert(!conflicts(instr->cp.right, right)); + if (right) { + instr->cp.right_cnt++; + instr->cp.right = right; + } +} + +/* remove neighbor reference, clearing left/right neighbor ptrs when + * there are no more references: + */ +static void remove_neighbors(struct ir3_instruction *instr) +{ + if (instr->cp.left) { + if (--instr->cp.left_cnt == 0) + instr->cp.left = NULL; + } + if (instr->cp.right) { + if (--instr->cp.right_cnt == 0) + instr->cp.right = NULL; + } +} + +/* stop condition for iteration: */ +static bool check_stop(struct ir3_instruction *instr) +{ + if (ir3_instr_check_mark(instr)) + return true; + + /* stay within the block.. don't try to operate across + * basic block boundaries or we'll have problems when + * dealing with multiple basic blocks: + */ + if (is_meta(instr) && (instr->opc == OPC_META_INPUT)) + return true; + + return false; +} + static bool is_eligible_mov(struct ir3_instruction *instr) { if ((instr->category == 1) && (instr->cat1.src_type == instr->cat1.dst_type)) { struct ir3_register *dst = instr->regs[0]; struct ir3_register *src = instr->regs[1]; + struct ir3_instruction *src_instr = ssa(src); if (dst->flags & IR3_REG_ADDR) return false; - if ((src->flags & IR3_REG_SSA) && - /* TODO: propagate abs/neg modifiers if possible */ - !(src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV))) + /* TODO: propagate abs/neg modifiers if possible */ + if (src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV)) + return false; + if (src_instr) { + /* check that eliminating the move won't result in + * a neighbor conflict, ie. if an instruction feeds + * into multiple fanins it can still only have at + * most one left and one right neighbor: + */ + if (conflicts(instr->cp.left, src_instr->cp.left)) + return false; + if (conflicts(instr->cp.right, src_instr->cp.right)) + return false; return true; + } } return false; } @@ -95,6 +165,9 @@ instr_cp_fanin(struct ir3_instruction *instr) /* we can't have 2 registers referring to the same instruction, so * go through and check if any already refer to the candidate * instruction. if so, don't do the propagation. + * + * NOTE: we need to keep this, despite the neighbor + * conflict checks, to avoid A<->B<->A.. */ for (j = 1; j < instr->regs_count; j++) if (instr->regs[j]->instr == cand) @@ -107,22 +180,23 @@ instr_cp_fanin(struct ir3_instruction *instr) walk_children(instr, false); return instr; - } static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep) { /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(instr)) + if (check_stop(instr)) return instr; if (is_meta(instr) && (instr->opc == OPC_META_FI)) return instr_cp_fanin(instr); - if (is_eligible_mov(instr) && !keep) { - struct ir3_register *src = instr->regs[1]; - return instr_cp(src->instr, false); + if (!keep && is_eligible_mov(instr)) { + struct ir3_instruction *src_instr = ssa(instr->regs[1]); + set_neighbors(src_instr, instr->cp.left, instr->cp.right); + remove_neighbors(instr); + return instr_cp(src_instr, false); } walk_children(instr, false); @@ -159,8 +233,88 @@ static void block_cp(struct ir3_block *block) } } +/* + * Find instruction neighbors: + */ + +static void instr_find_neighbors(struct ir3_instruction *instr) +{ + unsigned i; + + if (check_stop(instr)) + return; + + if (is_meta(instr) && (instr->opc == OPC_META_FI)) { + unsigned n = instr->regs_count; + for (i = 1; i < n; i++) { + struct ir3_instruction *src_instr = ssa(instr->regs[i]); + if (src_instr) { + struct ir3_instruction *left = (i > 1) ? + ssa(instr->regs[i-1]) : NULL; + struct ir3_instruction *right = (i < (n - 1)) ? + ssa(instr->regs[i+1]) : NULL; + set_neighbors(src_instr, left, right); + instr_find_neighbors(src_instr); + } + } + } else { + for (i = 1; i < instr->regs_count; i++) { + struct ir3_instruction *src_instr = ssa(instr->regs[i]); + if (src_instr) + instr_find_neighbors(src_instr); + } + } +} + +static void block_find_neighbors(struct ir3_block *block) +{ + unsigned i; + + for (i = 0; i < block->noutputs; i++) { + if (block->outputs[i]) { + struct ir3_instruction *instr = block->outputs[i]; + instr_find_neighbors(instr); + } + } +} + +static void instr_clear_neighbors(struct ir3_instruction *instr) +{ + unsigned i; + + if (check_stop(instr)) + return; + + instr->cp.left_cnt = 0; + instr->cp.left = NULL; + instr->cp.right_cnt = 0; + instr->cp.right = NULL; + + for (i = 1; i < instr->regs_count; i++) { + struct ir3_instruction *src_instr = ssa(instr->regs[i]); + if (src_instr) + instr_clear_neighbors(src_instr); + } +} + +static void block_clear_neighbors(struct ir3_block *block) +{ + unsigned i; + + for (i = 0; i < block->noutputs; i++) { + if (block->outputs[i]) { + struct ir3_instruction *instr = block->outputs[i]; + instr_clear_neighbors(instr); + } + } +} + void ir3_block_cp(struct ir3_block *block) { ir3_clear_mark(block->shader); + block_clear_neighbors(block); + ir3_clear_mark(block->shader); + block_find_neighbors(block); + ir3_clear_mark(block->shader); block_cp(block); } |