summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2015-03-29 11:24:57 -0400
committerRob Clark <[email protected]>2015-04-05 16:36:34 -0400
commitf370e95421f553ace931a02743c96be80fd62dc8 (patch)
treeeedcee138a10be5616b80f5883d421523aa92025 /src/gallium
parent104713d9f2dced94a427004a25c54b2c7feee166 (diff)
freedreno/ir3: handle const/immed/abs/neg in cp
Be smarter about propagating copies from const or immed, or with abs/neg modifiers. Also, realize that absneg.s and absneg.f are really "fancy" mov instructions. This opens up the possibility to remove more copies. It helps the TGSI frontend a bit, but will be really needed for the NIR f/e which builds everything up in SSA form (ie. will *always* insert a mov from const or immediate). Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h6
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c333
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_group.c6
3 files changed, 314 insertions, 31 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 8310286102e..f424f73bec3 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -525,6 +525,12 @@ static inline struct ir3_instruction *ssa(struct ir3_register *reg)
return NULL;
}
+static inline bool conflicts(struct ir3_instruction *a,
+ struct ir3_instruction *b)
+{
+ return (a && b) && (a != b);
+}
+
static inline bool reg_gpr(struct ir3_register *r)
{
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index ceebf803ee1..3eb85f660e2 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -32,29 +32,47 @@
/*
* Copy Propagate:
- *
- * We could eventually drop this, if the front-end did not insert any
- * mov's.. For now keeping it as a separate pass since that is less
- * painful than updating the existing frontend. It is expected that
- * with an eventual new NIR based frontend that we won't need this.
*/
-static void block_cp(struct ir3_block *block);
-static struct ir3_instruction * instr_cp(struct ir3_instruction *instr);
-static bool is_eligible_mov(struct ir3_instruction *instr)
+/* Is it a non-transformative (ie. not type changing) mov? This can
+ * also include absneg.s/absneg.f, which for the most part can be
+ * treated as a mov (single src argument).
+ */
+static bool is_same_type_mov(struct ir3_instruction *instr)
{
+ struct ir3_register *dst = instr->regs[0];
+
+ /* mov's that write to a0.x or p0.x are special: */
+ if (dst->num == regid(REG_P0, 0))
+ return false;
+ if (dst->num == regid(REG_A0, 0))
+ return false;
+
if ((instr->category == 1) &&
- (instr->cat1.src_type == instr->cat1.dst_type)) {
+ (instr->cat1.src_type == instr->cat1.dst_type))
+ return true;
+ if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
+ (instr->opc == OPC_ABSNEG_S)))
+ return true;
+ return false;
+}
+
+/* is it a type preserving mov, with ok flags? */
+static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
+{
+ if (is_same_type_mov(instr)) {
struct ir3_register *dst = instr->regs[0];
struct ir3_register *src = instr->regs[1];
struct ir3_instruction *src_instr = ssa(src);
if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV))
return false;
- /* TODO: propagate abs/neg modifiers if possible */
- if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
- IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_RELATIV))
+ if (src->flags & IR3_REG_RELATIV)
return false;
+ if (!allow_flags)
+ if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
+ IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+ return false;
if (!src_instr)
return false;
/* TODO: remove this hack: */
@@ -65,10 +83,270 @@ static bool is_eligible_mov(struct ir3_instruction *instr)
return false;
}
+static unsigned cp_flags(unsigned flags)
+{
+ /* only considering these flags (at least for now): */
+ flags &= (IR3_REG_CONST | IR3_REG_IMMED |
+ IR3_REG_FNEG | IR3_REG_FABS |
+ IR3_REG_SNEG | IR3_REG_SABS |
+ IR3_REG_BNOT);
+ return flags;
+}
+
+static bool valid_flags(struct ir3_instruction *instr, unsigned n,
+ unsigned flags)
+{
+ unsigned valid_flags;
+ flags = cp_flags(flags);
+
+ /* clear flags that are 'ok' */
+ switch (instr->category) {
+ case 1:
+ case 5:
+ /* no flags allowed */
+ if (flags)
+ return false;
+ break;
+ case 6:
+ valid_flags = IR3_REG_IMMED;
+ if (flags & ~valid_flags)
+ return false;
+ break;
+ case 2:
+ valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST;
+
+ if (ir3_cat2_immed(instr->opc))
+ valid_flags |= IR3_REG_IMMED;
+
+ if (flags & ~valid_flags)
+ return false;
+
+ if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
+ unsigned m = (n ^ 1) + 1;
+ /* cannot deal w/ const in both srcs:
+ * (note that some cat2 actually only have a single src)
+ */
+ if (m < instr->regs_count) {
+ struct ir3_register *reg = instr->regs[m];
+ if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
+ return false;
+ if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
+ return false;
+ }
+ /* cannot be const + ABS|NEG: */
+ if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
+ IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+ return false;
+ }
+ break;
+ case 3:
+ valid_flags = ir3_cat3_absneg(instr->opc) | IR3_REG_CONST;
+
+ if (flags & ~valid_flags)
+ return false;
+
+ if (flags & IR3_REG_CONST) {
+ /* cannot deal w/ const in 2nd src: */
+ /* TODO in some common cases, like mad, we can swap
+ * first two args.. possibly we should allow that here
+ * and fixup in legalize?
+ */
+ if (n == 1)
+ return false;
+ /* cannot be const + ABS|NEG: */
+ if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
+ IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+ return false;
+ }
+ break;
+ case 4:
+ /* seems like blob compiler avoids const as src.. */
+ /* TODO double check if this is still the case on a4xx */
+ if (flags & IR3_REG_CONST)
+ return false;
+ if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+/* propagate register flags from src to dst.. negates need special
+ * handling to cancel each other out.
+ */
+static void combine_flags(unsigned *dstflags, unsigned srcflags)
+{
+ /* if what we are combining into already has (abs) flags,
+ * we can drop (neg) from src:
+ */
+ if (*dstflags & IR3_REG_FABS)
+ srcflags &= ~IR3_REG_FNEG;
+ if (*dstflags & IR3_REG_SABS)
+ srcflags &= ~IR3_REG_SNEG;
+
+ if (srcflags & IR3_REG_FABS)
+ *dstflags |= IR3_REG_FABS;
+ if (srcflags & IR3_REG_SABS)
+ *dstflags |= IR3_REG_SABS;
+ if (srcflags & IR3_REG_FNEG)
+ *dstflags ^= IR3_REG_FNEG;
+ if (srcflags & IR3_REG_SNEG)
+ *dstflags ^= IR3_REG_SNEG;
+ if (srcflags & IR3_REG_BNOT)
+ *dstflags ^= IR3_REG_BNOT;
+}
+
+static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags);
+/**
+ * Handle cp for a given src register. This additionally handles
+ * the cases of collapsing immedate/const (which replace the src
+ * register with a non-ssa src) or collapsing mov's from relative
+ * src (which needs to also fixup the address src reference by the
+ * instruction).
+ */
+static void
+reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
+{
+ unsigned src_flags = 0, new_flags;
+ struct ir3_instruction *src_instr;
+
+ if (is_meta(instr)) {
+ /* meta instructions cannot fold up register
+ * flags.. they are usually src for texture
+ * fetch, etc, where we cannot specify abs/neg
+ */
+ reg->instr = instr_cp(reg->instr, NULL);
+ return;
+ }
+
+ src_instr = instr_cp(reg->instr, &src_flags);
+
+ new_flags = reg->flags;
+ combine_flags(&new_flags, src_flags);
+
+ reg->flags = new_flags;
+ reg->instr = src_instr;
+
+ if (!valid_flags(instr, n, reg->flags)) {
+ /* insert an absneg.f */
+ if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)) {
+ debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS)));
+ reg->instr = ir3_ABSNEG_S(instr->block,
+ reg->instr, cp_flags(src_flags));
+ } else {
+ debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)));
+ reg->instr = ir3_ABSNEG_F(instr->block,
+ reg->instr, cp_flags(src_flags));
+ }
+ reg->flags &= ~cp_flags(src_flags);
+ /* send it through instr_cp() again since
+ * the absneg src might be a mov from const
+ * that could be cleaned up:
+ */
+ reg->instr = instr_cp(reg->instr, NULL);
+ return;
+ }
+
+ if (is_same_type_mov(reg->instr)) {
+ struct ir3_register *src_reg = reg->instr->regs[1];
+ unsigned new_flags = src_reg->flags;
+
+ combine_flags(&new_flags, reg->flags);
+
+ if (!valid_flags(instr, n, new_flags))
+ return;
+
+ /* Here we handle the special case of mov from
+ * CONST and/or RELATIV. These need to be handled
+ * specially, because in the case of move from CONST
+ * there is no src ir3_instruction so we need to
+ * replace the ir3_register. And in the case of
+ * RELATIV we need to handle the address register
+ * dependency.
+ */
+ if (src_reg->flags & IR3_REG_CONST) {
+ /* an instruction cannot reference two different
+ * address registers:
+ */
+ if ((src_reg->flags & IR3_REG_RELATIV) &&
+ conflicts(instr->address, reg->instr->address))
+ return;
+
+ src_reg->flags = new_flags;
+ instr->regs[n+1] = src_reg;
+
+ if (src_reg->flags & IR3_REG_RELATIV)
+ instr->address = reg->instr->address;
+
+ return;
+ }
+
+ if ((src_reg->flags & IR3_REG_RELATIV) &&
+ !conflicts(instr->address, reg->instr->address)) {
+ src_reg->flags = new_flags;
+ instr->regs[n+1] = src_reg;
+ instr->address = reg->instr->address;
+
+ return;
+ }
+
+ /* NOTE: seems we can only do immed integers, so don't
+ * need to care about float. But we do need to handle
+ * abs/neg *before* checking that the immediate requires
+ * few enough bits to encode:
+ *
+ * TODO: do we need to do something to avoid accidentally
+ * catching a float immed?
+ */
+ if (src_reg->flags & IR3_REG_IMMED) {
+ int32_t iim_val = src_reg->iim_val;
+
+ debug_assert((instr->category == 6) ||
+ ((instr->category == 2) &&
+ ir3_cat2_immed(instr->opc)));
+
+ if (new_flags & IR3_REG_SABS)
+ iim_val = abs(iim_val);
+
+ if (new_flags & IR3_REG_SNEG)
+ iim_val = -iim_val;
+
+ if (new_flags & IR3_REG_BNOT)
+ iim_val = ~iim_val;
+
+ if (!(iim_val & ~0x3ff)) {
+ new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
+ src_reg->flags = new_flags;
+ src_reg->iim_val = iim_val;
+ instr->regs[n+1] = src_reg;
+ }
+
+ return;
+ }
+ }
+}
+
+/**
+ * Given an SSA src (instruction), return the one with extraneous
+ * mov's removed, ie, for (to copy NIR syntax):
+ *
+ * vec1 ssa1 = fadd <something>, <somethingelse>
+ * vec1 ssa2 = fabs ssa1
+ * vec1 ssa3 = fneg ssa1
+ *
+ * then calling instr_cp(ssa3, &flags) would return ssa1 with
+ * (IR3_REG_ABS | IR3_REG_NEGATE) in flags. If flags is NULL,
+ * then disallow eliminating copies which would require flag
+ * propagation (for example, we cannot propagate abs/neg into
+ * an output).
+ */
static struct ir3_instruction *
-instr_cp(struct ir3_instruction *instr)
+instr_cp(struct ir3_instruction *instr, unsigned *flags)
{
+ struct ir3_register *reg;
+
/* stay within the block.. don't try to operate across
* basic block boundaries or we'll have problems when
* dealing with multiple basic blocks:
@@ -76,9 +354,12 @@ instr_cp(struct ir3_instruction *instr)
if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
return instr;
- if (is_eligible_mov(instr)) {
- struct ir3_instruction *src_instr = ssa(instr->regs[1]);
- return instr_cp(src_instr);
+ if (is_eligible_mov(instr, !!flags)) {
+ struct ir3_register *reg = instr->regs[1];
+ struct ir3_instruction *src_instr = ssa(reg);
+ if (flags)
+ combine_flags(flags, reg->flags);
+ return instr_cp(src_instr, flags);
}
/* Check termination condition before walking children (rather
@@ -89,18 +370,20 @@ instr_cp(struct ir3_instruction *instr)
* up as a src, we only need to recursively walk the children
* once.)
*/
- if (!ir3_instr_check_mark(instr)) {
- struct ir3_register *reg;
+ if (ir3_instr_check_mark(instr))
+ return instr;
- /* walk down the graph from each src: */
- foreach_src(reg, instr)
- if (reg->flags & IR3_REG_SSA)
- reg->instr = instr_cp(reg->instr);
+ /* walk down the graph from each src: */
+ foreach_src_n(reg, n, instr) {
+ if (!(reg->flags & IR3_REG_SSA))
+ continue;
- if (instr->address)
- instr->address = instr_cp(instr->address);
+ reg_cp(instr, reg, n);
}
+ if (instr->address)
+ instr->address = instr_cp(instr->address, NULL);
+
return instr;
}
@@ -111,7 +394,7 @@ static void block_cp(struct ir3_block *block)
for (i = 0; i < block->noutputs; i++) {
if (block->outputs[i]) {
struct ir3_instruction *out =
- instr_cp(block->outputs[i]);
+ instr_cp(block->outputs[i], NULL);
block->outputs[i] = out;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c
index d48ecc307aa..782f6e87e56 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_group.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c
@@ -118,12 +118,6 @@ static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
static struct group_ops instr_ops = { instr_get, instr_insert_mov };
-
-static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
-{
- return (a && b) && (a != b);
-}
-
static void group_n(struct group_ops *ops, void *arr, unsigned n)
{
unsigned i, j;