summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/Makefile.sources4
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h76
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cmdline.c8
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.c72
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c259
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_group.c228
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c598
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_visitor.h154
8 files changed, 622 insertions, 777 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index 1cae52905ef..592f4b4a3fa 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -127,10 +127,10 @@ ir3_SOURCES := \
ir3/ir3_depth.c \
ir3/ir3_dump.c \
ir3/ir3_flatten.c \
+ ir3/ir3_group.c \
ir3/ir3.h \
ir3/ir3_legalize.c \
ir3/ir3_ra.c \
ir3/ir3_sched.c \
ir3/ir3_shader.c \
- ir3/ir3_shader.h \
- ir3/ir3_visitor.h
+ ir3/ir3_shader.h
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index bd0c0a5b693..aaa0ff6efa8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -228,26 +228,62 @@ struct ir3_instruction {
*/
#define DEPTH_UNUSED ~0
unsigned depth;
-
- /* Used just during cp stage, which comes before depth pass.
- * For fanin, where we need a sequence of consecutive registers,
- * keep track of each src instructions left (ie 'n-1') and right
- * (ie 'n+1') neighbor. The front-end must insert enough mov's
- * to ensure that each instruction has at most one left and at
- * most one right neighbor. During the copy-propagation pass,
- * we only remove mov's when we can preserve this constraint.
- */
- struct {
- struct ir3_instruction *left, *right;
- uint16_t left_cnt, right_cnt;
- } cp;
};
+
+ /* Used during CP and RA stages. For fanin and shader inputs/
+ * outputs where we need a sequence of consecutive registers,
+ * keep track of each src instructions left (ie 'n-1') and right
+ * (ie 'n+1') neighbor. The front-end must insert enough mov's
+ * to ensure that each instruction has at most one left and at
+ * most one right neighbor. During the copy-propagation pass,
+ * we only remove mov's when we can preserve this constraint.
+ * And during the RA stage, we use the neighbor information to
+ * allocate a block of registers in one shot.
+ *
+ * TODO: maybe just add something like:
+ * struct ir3_instruction_ref {
+ * struct ir3_instruction *instr;
+ * unsigned cnt;
+ * }
+ *
+ * Or can we get away without the refcnt stuff? It seems like
+ * it should be overkill.. the problem is if, potentially after
+ * already eliminating some mov's, if you have a single mov that
+ * needs to be grouped with it's neighbors in two different
+ * places (ex. shader output and a fanin).
+ */
+ struct {
+ struct ir3_instruction *left, *right;
+ uint16_t left_cnt, right_cnt;
+ } cp;
struct ir3_instruction *next;
#ifdef DEBUG
uint32_t serialno;
#endif
};
+static inline struct ir3_instruction *
+ir3_neighbor_first(struct ir3_instruction *instr)
+{
+ while (instr->cp.left)
+ instr = instr->cp.left;
+ return instr;
+}
+
+static inline int ir3_neighbor_count(struct ir3_instruction *instr)
+{
+ int num = 1;
+
+ debug_assert(!instr->cp.left);
+
+ while (instr->cp.right) {
+ num++;
+ instr = instr->cp.right;
+ }
+
+ return num;
+}
+
struct ir3_heap_chunk;
struct ir3 {
@@ -415,6 +451,15 @@ static inline bool writes_pred(struct ir3_instruction *instr)
return false;
}
+/* returns defining instruction for reg */
+/* TODO better name */
+static inline struct ir3_instruction *ssa(struct ir3_register *reg)
+{
+ if (reg->flags & IR3_REG_SSA)
+ return reg->instr;
+ return NULL;
+}
+
static inline bool reg_gpr(struct ir3_register *r)
{
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_ADDR))
@@ -443,12 +488,15 @@ void ir3_block_depth(struct ir3_block *block);
/* copy-propagate: */
void ir3_block_cp(struct ir3_block *block);
+/* group neightbors and insert mov's to resolve conflicts: */
+void ir3_block_group(struct ir3_block *block);
+
/* scheduling: */
int ir3_block_sched(struct ir3_block *block);
/* register assignment: */
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
- bool half_precision, bool frag_coord, bool frag_face);
+ bool frag_coord, bool frag_face);
/* legalize: */
void ir3_block_legalize(struct ir3_block *block,
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index 081143d5d6e..6c334d200a3 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -61,8 +61,10 @@ static void dump_info(struct ir3_shader_variant *so, const char *str)
if (block) {
for (i = 0; i < block->ninputs; i++) {
- if (!block->inputs[i])
+ if (!block->inputs[i]) {
+ debug_printf("; in%d unused\n", i);
continue;
+ }
reg = block->inputs[i]->regs[0];
regid = reg->num;
debug_printf("@in(%sr%d.%c)\tin%d\n",
@@ -71,8 +73,10 @@ if (block) {
}
for (i = 0; i < block->noutputs; i++) {
- if (!block->outputs[i])
+ if (!block->outputs[i]) {
+ debug_printf("; out%d unused\n", i);
continue;
+ }
/* kill shows up as a virtual output.. skip it! */
if (is_kill(block->outputs[i]))
continue;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index b47aa1d14d8..209621bd013 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -571,23 +571,40 @@ add_dst_reg_wrmask(struct ir3_compile_context *ctx,
} else if ((dst->File == TGSI_FILE_TEMPORARY) ||
(dst->File == TGSI_FILE_OUTPUT) ||
(dst->File == TGSI_FILE_ADDRESS)) {
+ struct ir3_instruction *prev = NULL;
unsigned i;
/* if instruction writes multiple, we need to create
* some place-holder collect the registers:
*/
for (i = 0; i < 4; i++) {
- if (wrmask & (1 << i)) {
- struct ir3_instruction *collect =
- ir3_instr_create(ctx->block, -1, OPC_META_FO);
- collect->fo.off = i;
- /* unused dst reg: */
- ir3_reg_create(collect, 0, 0);
- /* and src reg used to hold original instr */
- ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = instr;
- if (!ctx->atomic)
- ssa_dst(ctx, collect, dst, chan+i);
+ /* NOTE: slightly ugly that we setup neighbor ptrs
+ * for FO here, but handle FI in CP pass.. we should
+ * probably just always setup neighbor ptrs in the
+ * frontend?
+ */
+ struct ir3_instruction *split =
+ ir3_instr_create(ctx->block, -1, OPC_META_FO);
+ split->fo.off = i;
+ /* unused dst reg: */
+ /* NOTE: set SSA flag on dst here, because unused FO's
+ * which don't get scheduled will end up not in the
+ * instruction list when RA sets SSA flag on each dst.
+ * Slight hack. We really should set SSA flag on
+ * every dst register in the frontend.
+ */
+ ir3_reg_create(split, 0, IR3_REG_SSA);
+ /* and src reg used to hold original instr */
+ ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr;
+ if (prev) {
+ split->cp.left = prev;
+ split->cp.left_cnt++;
+ prev->cp.right = split;
+ prev->cp.right_cnt++;
}
+ if ((wrmask & (1 << i)) && !ctx->atomic)
+ ssa_dst(ctx, split, dst, chan+i);
+ prev = split;
}
}
@@ -3120,6 +3137,17 @@ ir3_compile_shader(struct ir3_shader_variant *so,
}
}
+ /* if we want half-precision outputs, mark the output registers
+ * as half:
+ */
+ if (key.half_precision) {
+ for (i = 0; i < block->noutputs; i++) {
+ if (!block->outputs[i])
+ continue;
+ block->outputs[i]->regs[0]->flags |= IR3_REG_HALF;
+ }
+ }
+
/* at this point, we want the kill's in the outputs array too,
* so that they get scheduled (since they have no dst).. we've
* already ensured that the array is big enough in push_block():
@@ -3145,9 +3173,26 @@ ir3_compile_shader(struct ir3_shader_variant *so,
ir3_dump_instr_list(block->head);
}
+ ir3_block_depth(block);
+
+ /* First remove all the extra mov's (which we could skip if the
+ * front-end was clever enough not to insert them in the first
+ * place). Then figure out left/right neighbors, re-inserting
+ * extra mov's when needed to avoid conflicts.
+ */
if (cp && !(fd_mesa_debug & FD_DBG_NOCP))
ir3_block_cp(block);
+ if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+ printf("BEFORE GROUPING:\n");
+ ir3_dump_instr_list(block->head);
+ }
+
+ /* Group left/right neighbors, inserting mov's where needed to
+ * solve conflicts:
+ */
+ ir3_block_group(block);
+
if (fd_mesa_debug & FD_DBG_OPTDUMP)
compile_dump(&ctx);
@@ -3169,20 +3214,19 @@ ir3_compile_shader(struct ir3_shader_variant *so,
ir3_dump_instr_list(block->head);
}
- ret = ir3_block_ra(block, so->type, key.half_precision,
- so->frag_coord, so->frag_face);
+ ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face);
if (ret) {
DBG("RA failed!");
goto out;
}
- ir3_block_legalize(block, &so->has_samp, &max_bary);
-
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("AFTER RA:\n");
ir3_dump_instr_list(block->head);
}
+ ir3_block_legalize(block, &so->has_samp, &max_bary);
+
/* fixup input/outputs: */
for (i = 0; i < so->outputs_count; i++) {
so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 2076b62acb8..c55425d68d4 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -33,69 +33,14 @@
/*
* Copy Propagate:
*
+ * We could eventually drop this, if the front-end did not insert any
+ * mov's.. For now keeping it as a separate pass since that is less
+ * painful than updating the existing frontend. It is expected that
+ * with an eventual new NIR based frontend that we won't need this.
*/
static void block_cp(struct ir3_block *block);
-static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep);
-
-/* XXX move this somewhere useful (and rename?) */
-static struct ir3_instruction *ssa(struct ir3_register *reg)
-{
- if (reg->flags & IR3_REG_SSA)
- return reg->instr;
- return NULL;
-}
-
-static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
-{
- return (a && b) && (a != b);
-}
-
-static void set_neighbors(struct ir3_instruction *instr,
- struct ir3_instruction *left, struct ir3_instruction *right)
-{
- debug_assert(!conflicts(instr->cp.left, left));
- if (left) {
- instr->cp.left_cnt++;
- instr->cp.left = left;
- }
- debug_assert(!conflicts(instr->cp.right, right));
- if (right) {
- instr->cp.right_cnt++;
- instr->cp.right = right;
- }
-}
-
-/* remove neighbor reference, clearing left/right neighbor ptrs when
- * there are no more references:
- */
-static void remove_neighbors(struct ir3_instruction *instr)
-{
- if (instr->cp.left) {
- if (--instr->cp.left_cnt == 0)
- instr->cp.left = NULL;
- }
- if (instr->cp.right) {
- if (--instr->cp.right_cnt == 0)
- instr->cp.right = NULL;
- }
-}
-
-/* stop condition for iteration: */
-static bool check_stop(struct ir3_instruction *instr)
-{
- if (ir3_instr_check_mark(instr))
- return true;
-
- /* stay within the block.. don't try to operate across
- * basic block boundaries or we'll have problems when
- * dealing with multiple basic blocks:
- */
- if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
- return true;
-
- return false;
-}
+static struct ir3_instruction * instr_cp(struct ir3_instruction *instr);
static bool is_eligible_mov(struct ir3_instruction *instr)
{
@@ -109,23 +54,17 @@ static bool is_eligible_mov(struct ir3_instruction *instr)
/* TODO: propagate abs/neg modifiers if possible */
if (src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV))
return false;
- if (src_instr) {
- /* check that eliminating the move won't result in
- * a neighbor conflict, ie. if an instruction feeds
- * into multiple fanins it can still only have at
- * most one left and one right neighbor:
- */
- if (conflicts(instr->cp.left, src_instr->cp.left))
- return false;
- if (conflicts(instr->cp.right, src_instr->cp.right))
- return false;
- return true;
- }
+ if (!src_instr)
+ return false;
+ /* TODO: remove this hack: */
+ if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO))
+ return false;
+ return true;
}
return false;
}
-static void walk_children(struct ir3_instruction *instr, bool keep)
+static void walk_children(struct ir3_instruction *instr)
{
unsigned i;
@@ -133,188 +72,56 @@ static void walk_children(struct ir3_instruction *instr, bool keep)
for (i = 1; i < instr->regs_count; i++) {
struct ir3_register *src = instr->regs[i];
if (src->flags & IR3_REG_SSA)
- src->instr = instr_cp(src->instr, keep);
+ src->instr = instr_cp(src->instr);
}
}
-static struct ir3_instruction *
-instr_cp_fanin(struct ir3_instruction *instr)
-{
- unsigned i, j;
-
- /* we need to handle fanin specially, to detect cases
- * when we need to keep a mov
- */
-
- for (i = 1; i < instr->regs_count; i++) {
- struct ir3_register *src = instr->regs[i];
- if (src->flags & IR3_REG_SSA) {
- struct ir3_instruction *cand =
- instr_cp(src->instr, false);
-
- /* if the candidate is a fanout, then keep
- * the move.
- *
- * This is a bit, um, fragile, but it should
- * catch the extra mov's that the front-end
- * puts in for us already in these cases.
- */
- if (is_meta(cand) && (cand->opc == OPC_META_FO))
- cand = instr_cp(src->instr, true);
-
- /* we can't have 2 registers referring to the same instruction, so
- * go through and check if any already refer to the candidate
- * instruction. if so, don't do the propagation.
- *
- * NOTE: we need to keep this, despite the neighbor
- * conflict checks, to avoid A<->B<->A..
- */
- for (j = 1; j < instr->regs_count; j++)
- if (instr->regs[j]->instr == cand)
- break;
- if (j == instr->regs_count)
- src->instr = cand;
- }
- }
-
- walk_children(instr, false);
-
- return instr;
-}
static struct ir3_instruction *
-instr_cp(struct ir3_instruction *instr, bool keep)
+instr_cp(struct ir3_instruction *instr)
{
- /* if we've already visited this instruction, bail now: */
- if (check_stop(instr))
+ /* stay within the block.. don't try to operate across
+ * basic block boundaries or we'll have problems when
+ * dealing with multiple basic blocks:
+ */
+ if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
return instr;
- if (is_meta(instr) && (instr->opc == OPC_META_FI))
- return instr_cp_fanin(instr);
-
- if (!keep && is_eligible_mov(instr)) {
+ if (is_eligible_mov(instr)) {
struct ir3_instruction *src_instr = ssa(instr->regs[1]);
- set_neighbors(src_instr, instr->cp.left, instr->cp.right);
- remove_neighbors(instr);
- return instr_cp(src_instr, false);
+ return instr_cp(src_instr);
}
- walk_children(instr, false);
+ /* Check termination condition before walking children (rather
+ * than before checking eligible-mov). A mov instruction may
+ * appear as ssa-src for multiple other instructions, and we
+ * want to consider it for removal for each, rather than just
+ * the first one. (But regardless of how many places it shows
+ * up as a src, we only need to recursively walk the children
+ * once.)
+ */
+ if (!ir3_instr_check_mark(instr))
+ walk_children(instr);
return instr;
}
static void block_cp(struct ir3_block *block)
{
- unsigned i, j;
+ unsigned i;
for (i = 0; i < block->noutputs; i++) {
if (block->outputs[i]) {
struct ir3_instruction *out =
- instr_cp(block->outputs[i], false);
-
- /* To deal with things like this:
- *
- * 43: MOV OUT[2], TEMP[5]
- * 44: MOV OUT[0], TEMP[5]
- *
- * we need to ensure that no two outputs point to
- * the same instruction
- */
- for (j = 0; j < i; j++) {
- if (block->outputs[j] == out) {
- out = instr_cp(block->outputs[i], true);
- break;
- }
- }
+ instr_cp(block->outputs[i]);
block->outputs[i] = out;
}
}
}
-/*
- * Find instruction neighbors:
- */
-
-static void instr_find_neighbors(struct ir3_instruction *instr)
-{
- unsigned i;
-
- if (check_stop(instr))
- return;
-
- if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
- unsigned n = instr->regs_count;
- for (i = 1; i < n; i++) {
- struct ir3_instruction *src_instr = ssa(instr->regs[i]);
- if (src_instr) {
- struct ir3_instruction *left = (i > 1) ?
- ssa(instr->regs[i-1]) : NULL;
- struct ir3_instruction *right = (i < (n - 1)) ?
- ssa(instr->regs[i+1]) : NULL;
- set_neighbors(src_instr, left, right);
- instr_find_neighbors(src_instr);
- }
- }
- } else {
- for (i = 1; i < instr->regs_count; i++) {
- struct ir3_instruction *src_instr = ssa(instr->regs[i]);
- if (src_instr)
- instr_find_neighbors(src_instr);
- }
- }
-}
-
-static void block_find_neighbors(struct ir3_block *block)
-{
- unsigned i;
-
- for (i = 0; i < block->noutputs; i++) {
- if (block->outputs[i]) {
- struct ir3_instruction *instr = block->outputs[i];
- instr_find_neighbors(instr);
- }
- }
-}
-
-static void instr_clear_neighbors(struct ir3_instruction *instr)
-{
- unsigned i;
-
- if (check_stop(instr))
- return;
-
- instr->cp.left_cnt = 0;
- instr->cp.left = NULL;
- instr->cp.right_cnt = 0;
- instr->cp.right = NULL;
-
- for (i = 1; i < instr->regs_count; i++) {
- struct ir3_instruction *src_instr = ssa(instr->regs[i]);
- if (src_instr)
- instr_clear_neighbors(src_instr);
- }
-}
-
-static void block_clear_neighbors(struct ir3_block *block)
-{
- unsigned i;
-
- for (i = 0; i < block->noutputs; i++) {
- if (block->outputs[i]) {
- struct ir3_instruction *instr = block->outputs[i];
- instr_clear_neighbors(instr);
- }
- }
-}
-
void ir3_block_cp(struct ir3_block *block)
{
ir3_clear_mark(block->shader);
- block_clear_neighbors(block);
- ir3_clear_mark(block->shader);
- block_find_neighbors(block);
- ir3_clear_mark(block->shader);
block_cp(block);
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c
new file mode 100644
index 00000000000..f215c1c15d2
--- /dev/null
+++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c
@@ -0,0 +1,228 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <[email protected]>
+ */
+
+#include "freedreno_util.h"
+
+#include "ir3.h"
+
+/*
+ * Find/group instruction neighbors:
+ */
+
+/* stop condition for iteration: */
+static bool check_stop(struct ir3_instruction *instr)
+{
+ if (ir3_instr_check_mark(instr))
+ return true;
+
+ /* stay within the block.. don't try to operate across
+ * basic block boundaries or we'll have problems when
+ * dealing with multiple basic blocks:
+ */
+ if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
+ return true;
+
+ return false;
+}
+
+/* bleh.. we need to do the same group_n() thing for both inputs/outputs
+ * (where we have a simple instr[] array), and fanin nodes (where we have
+ * an extra indirection via reg->instr).
+ */
+struct group_ops {
+ struct ir3_instruction *(*get)(void *arr, int idx);
+ void (*set)(void *arr, int idx, struct ir3_instruction *instr);
+};
+
+static struct ir3_instruction *arr_get(void *arr, int idx)
+{
+ return ((struct ir3_instruction **)arr)[idx];
+}
+static void arr_set_out(void *arr, int idx, struct ir3_instruction *instr)
+{
+ ((struct ir3_instruction **)arr)[idx] = instr;
+}
+static void arr_set_in(void *arr, int idx, struct ir3_instruction *instr)
+{
+ debug_printf("cannot insert mov before input!\n");
+ debug_assert(0);
+}
+static struct group_ops arr_ops_out = { arr_get, arr_set_out };
+static struct group_ops arr_ops_in = { arr_get, arr_set_in };
+
+static struct ir3_instruction *instr_get(void *arr, int idx)
+{
+ return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
+}
+static void instr_set(void *arr, int idx, struct ir3_instruction *instr)
+{
+ ((struct ir3_instruction *)arr)->regs[idx+1]->instr = instr;
+}
+static struct group_ops instr_ops = { instr_get, instr_set };
+
+
+
+static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
+{
+ return (a && b) && (a != b);
+}
+
+static struct ir3_instruction *
+create_mov(struct ir3_instruction *instr)
+{
+ struct ir3_instruction *mov;
+
+ mov = ir3_instr_create(instr->block, 1, 0);
+ mov->cat1.src_type = TYPE_F32;
+ mov->cat1.dst_type = TYPE_F32;
+ ir3_reg_create(mov, 0, 0); /* dst */
+ ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = instr;
+
+ return mov;
+}
+
+static void group_n(struct group_ops *ops, void *arr, unsigned n)
+{
+ unsigned i, j;
+
+ /* first pass, figure out what has conflicts and needs a mov
+ * inserted. Do this up front, before starting to setup
+ * left/right neighbor pointers. Trying to do it in a single
+ * pass could result in a situation where we can't even setup
+ * the mov's right neighbor ptr if the next instr also needs
+ * a mov.
+ */
+restart:
+ for (i = 0; i < n; i++) {
+ struct ir3_instruction *instr = ops->get(arr, i);
+ if (instr) {
+ struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
+ struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
+ bool conflict;
+
+ /* check for left/right neighbor conflicts: */
+ conflict = conflicts(instr->cp.left, left) ||
+ conflicts(instr->cp.right, right);
+
+ /* we also can't have an instr twice in the group: */
+ for (j = i + 1; (j < n) && !conflict; j++)
+ if (ops->get(arr, j) == instr)
+ conflict = true;
+
+ if (conflict) {
+ instr = create_mov(instr);
+ ops->set(arr, i, instr);
+ /* inserting the mov may have caused a conflict
+ * against the previous:
+ */
+ goto restart;
+ }
+ }
+ }
+
+ /* second pass, now that we've inserted mov's, fixup left/right
+ * neighbors. This is guaranteed to succeed, since by definition
+ * the newly inserted mov's cannot conflict with anything.
+ */
+ for (i = 0; i < n; i++) {
+ struct ir3_instruction *instr = ops->get(arr, i);
+ if (instr) {
+ struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
+ struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
+
+ debug_assert(!conflicts(instr->cp.left, left));
+ if (left) {
+ instr->cp.left_cnt++;
+ instr->cp.left = left;
+ }
+
+ debug_assert(!conflicts(instr->cp.right, right));
+ if (right) {
+ instr->cp.right_cnt++;
+ instr->cp.right = right;
+ }
+ }
+ }
+}
+
+static void instr_find_neighbors(struct ir3_instruction *instr)
+{
+ unsigned i;
+
+ if (check_stop(instr))
+ return;
+
+ if (is_meta(instr) && (instr->opc == OPC_META_FI))
+ group_n(&instr_ops, instr, instr->regs_count - 1);
+
+ for (i = 1; i < instr->regs_count; i++) {
+ struct ir3_instruction *src_instr = ssa(instr->regs[i]);
+ if (src_instr)
+ instr_find_neighbors(src_instr);
+ }
+}
+
+static void block_find_neighbors(struct ir3_block *block)
+{
+ unsigned i;
+
+ for (i = 0; i < block->noutputs; i++) {
+ if (block->outputs[i]) {
+ struct ir3_instruction *instr = block->outputs[i];
+ instr_find_neighbors(instr);
+ }
+ }
+
+ /* shader inputs/outputs themselves must be contiguous as well:
+ */
+ if (!block->parent) {
+ /* NOTE: group inputs first, since we only insert mov's
+ * *before* the conflicted instr (and that would go badly
+ * for inputs). By doing inputs first, we should never
+ * have a conflict on inputs.. pushing any conflict to
+ * resolve to the outputs, for stuff like:
+ *
+ * MOV OUT[n], IN[m].wzyx
+ *
+ * NOTE: we assume here inputs/outputs are grouped in vec4.
+ * This logic won't quite cut it if we don't align smaller
+ * on vec4 boundaries
+ */
+ for (i = 0; i < block->ninputs; i += 4)
+ group_n(&arr_ops_in, &block->inputs[i], 4);
+ for (i = 0; i < block->noutputs; i += 4)
+ group_n(&arr_ops_out, &block->outputs[i], 4);
+
+ }
+}
+
+void ir3_block_group(struct ir3_block *block)
+{
+ ir3_clear_mark(block->shader);
+ block_find_neighbors(block);
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 611b5425466..08540466bb0 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -30,7 +30,6 @@
#include "util/u_math.h"
#include "ir3.h"
-#include "ir3_visitor.h"
/*
* Register Assignment:
@@ -53,7 +52,6 @@
struct ir3_ra_ctx {
struct ir3_block *block;
enum shader_t type;
- bool half_precision;
bool frag_coord;
bool frag_face;
int cnt;
@@ -81,6 +79,15 @@ struct ir3_ra_ctx {
} \
} while (0)
+#define ra_assert(ctx, x) do { \
+ debug_assert(x); \
+ if (!(x)) { \
+ debug_printf("RA: failed assert: %s\n", #x); \
+ (ctx)->error = true; \
+ }; \
+ } while (0)
+
+
/* sorta ugly way to retrofit half-precision support.. rather than
* passing extra param around, just OR in a high bit. All the low
* value arithmetic (ie. +/- offset within a contiguous vec4, etc)
@@ -89,19 +96,6 @@ struct ir3_ra_ctx {
*/
#define REG_HALF 0x8000
-struct ir3_ra_assignment {
- int8_t off; /* offset of instruction dst within range */
- uint8_t num; /* number of components for the range */
-};
-
-static void ra_assign(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *assigner, int num);
-static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr);
-
-/*
- * Register Allocation:
- */
-
#define REG(n, wm, f) (struct ir3_register){ \
.flags = (f), \
.num = (n), \
@@ -117,19 +111,34 @@ static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n
return NULL;
}
-static int output_base(struct ir3_ra_ctx *ctx)
+/* figure out if an unassigned src register points back to the instr we
+ * are assigning:
+ */
+static bool instr_used_by(struct ir3_instruction *instr,
+ struct ir3_register *src)
{
- /* ugg, for fragment shader we need to have input at r0.x
- * (or at least if there is a way to configure it, I can't
- * see how because the blob driver always uses r0.x (ie.
- * all zeros)
- */
- if (ctx->type == SHADER_FRAGMENT) {
- if (ctx->half_precision)
- return ctx->frag_face ? 4 : 3;
- return ctx->frag_coord ? 8 : 4;
- }
- return 0;
+ struct ir3_instruction *src_instr = ssa(src);
+ unsigned i;
+ if (instr == src_instr)
+ return true;
+ if (src_instr && is_meta(src_instr))
+ for (i = 1; i < src_instr->regs_count; i++)
+ if (instr_used_by(instr, src_instr->regs[i]))
+ return true;
+
+ return false;
+}
+
+static bool instr_is_output(struct ir3_instruction *instr)
+{
+ struct ir3_block *block = instr->block;
+ unsigned i;
+
+ for (i = 0; i < block->noutputs; i++)
+ if (instr == block->outputs[i])
+ return true;
+
+ return false;
}
/* live means read before written */
@@ -137,100 +146,59 @@ static void compute_liveregs(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, regmask_t *liveregs)
{
struct ir3_block *block = instr->block;
+ struct ir3_instruction *n;
regmask_t written;
- unsigned i, j;
+ unsigned i;
- regmask_init(liveregs);
regmask_init(&written);
- for (instr = instr->next; instr; instr = instr->next) {
+ for (n = instr->next; n; n = n->next) {
struct ir3_register *r;
- if (is_meta(instr))
+ if (is_meta(n))
continue;
/* check first src's read: */
- for (j = 1; j < instr->regs_count; j++) {
- r = reg_check(instr, j);
+ for (i = 1; i < n->regs_count; i++) {
+ r = reg_check(n, i);
if (r)
regmask_set_if_not(liveregs, r, &written);
+
+ /* if any src points back to the instruction(s) in
+ * the block of neighbors that we are assigning then
+ * mark any written (clobbered) registers as live:
+ */
+ if (instr_used_by(instr, n->regs[i]))
+ regmask_or(liveregs, liveregs, &written);
}
+ /* meta-instructions don't actually get scheduled,
+ * so don't let it's write confuse us.. what we
+ * really care about is when the src to the meta
+ * instr was written:
+ */
+ if (is_meta(n))
+ continue;
+
/* then dst written (if assigned already): */
- if (instr->flags & IR3_INSTR_MARK) {
- r = reg_check(instr, 0);
- if (r)
+ r = reg_check(n, 0);
+ if (r) {
+ /* if an instruction *is* an output, then it is live */
+ if (!instr_is_output(n))
regmask_set(&written, r);
}
+
}
/* be sure to account for output registers too: */
for (i = 0; i < block->noutputs; i++) {
- struct ir3_register reg = REG(output_base(ctx) + i, X, 0);
- regmask_set_if_not(liveregs, &reg, &written);
- }
-}
-
-/* calculate registers that are clobbered before last use of 'assigner'.
- * This needs to be done backwards, although it could possibly be
- * combined into compute_liveregs(). (Ie. compute_liveregs() could
- * reverse the list, then do this part backwards reversing the list
- * again back to original order.) Otoh, probably I should try to
- * construct a proper interference graph instead.
- *
- * XXX this need to follow the same recursion path that is used for
- * to rename/assign registers (ie. ra_assign_src()).. this is a bit
- * ugly right now, maybe refactor into node iterator sort of things
- * that iterates nodes in the correct order?
- */
-static bool compute_clobbers(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *instr, struct ir3_instruction *assigner,
- regmask_t *liveregs)
-{
- unsigned i;
- bool live = false, was_live = false;
-
- if (instr == NULL) {
- struct ir3_block *block = ctx->block;
-
- /* if at the end, check outputs: */
- for (i = 0; i < block->noutputs; i++)
- if (block->outputs[i] == assigner)
- return true;
- return false;
- }
-
- for (i = 1; i < instr->regs_count; i++) {
- struct ir3_register *reg = instr->regs[i];
- if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) {
- if (is_meta(instr)) {
- switch (instr->opc) {
- case OPC_META_INPUT:
- // TODO
- assert(0);
- break;
- case OPC_META_FO:
- case OPC_META_FI:
- was_live |= compute_clobbers(ctx, instr->next,
- instr, liveregs);
- break;
- default:
- break;
- }
- }
- live = true;
- break;
- }
+ struct ir3_register *r;
+ if (!block->outputs[i])
+ continue;
+ r = reg_check(block->outputs[i], 0);
+ if (r)
+ regmask_set_if_not(liveregs, r, &written);
}
-
- was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs);
-
- if (was_live && (instr->regs_count > 0) &&
- (instr->flags & IR3_INSTR_MARK) &&
- !is_meta(instr))
- regmask_set(liveregs, instr->regs[0]);
-
- return live || was_live;
}
static int find_available(regmask_t *liveregs, int size, bool half)
@@ -254,141 +222,39 @@ static int find_available(regmask_t *liveregs, int size, bool half)
static int alloc_block(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, int size)
{
- if (!instr) {
- /* special case, allocating shader outputs. At this
- * point, nothing is allocated, just start the shader
- * outputs at r0.x and let compute_liveregs() take
- * care of the rest from here:
- */
- return 0;
- } else {
- struct ir3_register *dst = instr->regs[0];
- regmask_t liveregs;
-
- compute_liveregs(ctx, instr, &liveregs);
-
- // XXX XXX XXX XXX XXX XXX XXX XXX XXX
- // XXX hack.. maybe ra_calc should give us a list of
- // instrs to compute_clobbers() on?
- if (is_meta(instr) && (instr->opc == OPC_META_INPUT) &&
- (instr->regs_count == 1)) {
- unsigned i, base = instr->regs[0]->num & ~0x3;
- for (i = 0; i < 4; i++) {
- struct ir3_instruction *in = NULL;
- if ((base + i) < ctx->block->ninputs)
- in = ctx->block->inputs[base + i];
- if (in)
- compute_clobbers(ctx, in->next, in, &liveregs);
- }
- } else
- // XXX XXX XXX XXX XXX XXX XXX XXX XXX
- compute_clobbers(ctx, instr->next, instr, &liveregs);
-
- return find_available(&liveregs, size,
- !!(dst->flags & IR3_REG_HALF));
- }
-}
-
-/*
- * Constraint Calculation:
- */
-
-struct ra_calc_visitor {
- struct ir3_visitor base;
- struct ir3_ra_assignment a;
-};
-
-static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v)
-{
- return (struct ra_calc_visitor *)v;
-}
-
-/* calculate register assignment for the instruction. If the register
- * written by this instruction is required to be part of a range, to
- * handle other (input/output/sam/bary.f/etc) contiguous register range
- * constraints, that is calculated handled here.
- */
-static void ra_calc_dst(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
-{
- struct ra_calc_visitor *c = ra_calc_visitor(v);
- if (is_tex(instr)) {
- c->a.off = 0;
- c->a.num = 4;
- } else {
- c->a.off = 0;
- c->a.num = 1;
- }
-}
-
-static void
-ra_calc_dst_shader_input(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
-{
- struct ra_calc_visitor *c = ra_calc_visitor(v);
- struct ir3_block *block = instr->block;
struct ir3_register *dst = instr->regs[0];
- unsigned base = dst->num & ~0x3;
- unsigned i, num = 0;
-
- assert(!(dst->flags & IR3_REG_IA));
-
- /* check what input components we need: */
- for (i = 0; i < 4; i++) {
- unsigned idx = base + i;
- if ((idx < block->ninputs) && block->inputs[idx])
- num = i + 1;
- }
-
- c->a.off = dst->num - base;
- c->a.num = num;
-}
-
-static void ra_calc_src_fanin(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
-{
- struct ra_calc_visitor *c = ra_calc_visitor(v);
- unsigned srcn = ir3_instr_regno(instr, reg) - 1;
- c->a.off += srcn;
- c->a.num += srcn;
- c->a.num = MAX2(c->a.num, instr->regs_count - 1);
-}
-
-static const struct ir3_visitor_funcs calc_visitor_funcs = {
- .instr = ir3_visit_instr,
- .dst_shader_input = ra_calc_dst_shader_input,
- .dst_fanout = ra_calc_dst,
- .dst_fanin = ra_calc_dst,
- .dst = ra_calc_dst,
- .src_fanout = ir3_visit_reg,
- .src_fanin = ra_calc_src_fanin,
- .src = ir3_visit_reg,
-};
-
-static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner)
-{
- struct ra_calc_visitor v = {
- .base.funcs = &calc_visitor_funcs,
- };
+ struct ir3_instruction *n;
+ regmask_t liveregs;
+ unsigned name;
+
+ /* should only ever be called w/ head of neighbor list: */
+ debug_assert(!instr->cp.left);
+
+ regmask_init(&liveregs);
+
+ for (n = instr; n; n = n->cp.right)
+ compute_liveregs(ctx, n, &liveregs);
+
+ /* because we do assignment on fanout nodes for wrmask!=0x1, we
+ * need to handle this special case, where the fanout nodes all
+ * appear after one or more of the consumers of the src node:
+ *
+ * 0098:009: sam _, r2.x
+ * 0028:010: mul.f r3.z, r4.x, c13.x
+ * ; we start assigning here for '0098:009: sam'.. but
+ * ; would miss the usage at '0028:010: mul.f'
+ * 0101:009: _meta:fo _, _[0098:009: sam], off=2
+ */
+ if (is_meta(instr) && (instr->opc == OPC_META_FO))
+ compute_liveregs(ctx, instr->regs[1]->instr, &liveregs);
- ir3_visit_instr(&v.base, assigner);
+ name = find_available(&liveregs, size,
+ !!(dst->flags & IR3_REG_HALF));
- return v.a;
-}
+ if (dst->flags & IR3_REG_HALF)
+ name |= REG_HALF;
-/*
- * Register Assignment:
- */
-
-struct ra_assign_visitor {
- struct ir3_visitor base;
- struct ir3_ra_ctx *ctx;
- int num;
-};
-
-static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v)
-{
- return (struct ra_assign_visitor *)v;
+ return name;
}
static type_t half_type(type_t type)
@@ -459,17 +325,15 @@ static void fixup_half_instr_src(struct ir3_instruction *instr)
}
}
-static void ra_assign_reg(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
+static void reg_assign(struct ir3_instruction *instr,
+ unsigned r, unsigned name)
{
- struct ra_assign_visitor *a = ra_assign_visitor(v);
+ struct ir3_register *reg = instr->regs[r];
reg->flags &= ~IR3_REG_SSA;
- reg->num = a->num & ~REG_HALF;
+ reg->num = name & ~REG_HALF;
- assert(reg->num >= 0);
-
- if (a->num & REG_HALF) {
+ if (name & REG_HALF) {
reg->flags |= IR3_REG_HALF;
/* if dst reg being assigned, patch up the instr: */
if (reg == instr->regs[0])
@@ -479,192 +343,194 @@ static void ra_assign_reg(struct ir3_visitor *v,
}
}
-static void ra_assign_dst_shader_input(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
+static void instr_assign(struct ir3_ra_ctx *ctx,
+ struct ir3_instruction *instr, unsigned name);
+
+static void instr_assign_src(struct ir3_ra_ctx *ctx,
+ struct ir3_instruction *instr, unsigned r, unsigned name)
{
- struct ra_assign_visitor *a = ra_assign_visitor(v);
- unsigned i, base = reg->num & ~0x3;
- int off = base - reg->num;
-
- ra_assign_reg(v, instr, reg);
- reg->flags |= IR3_REG_IA;
-
- /* trigger assignment of all our companion input components: */
- for (i = 0; i < 4; i++) {
- struct ir3_instruction *in = NULL;
- if ((base + i) < instr->block->ninputs)
- in = instr->block->inputs[base + i];
- if (in && is_meta(in) && (in->opc == OPC_META_INPUT))
- ra_assign(a->ctx, in, a->num + off + i);
+ reg_assign(instr, r, name);
+
+ if (is_meta(instr)) {
+ switch (instr->opc) {
+ case OPC_META_INPUT:
+ /* shader-input does not have a src, only block input: */
+ debug_assert(instr->regs_count == 2);
+ instr_assign(ctx, instr, name);
+ return;
+ case OPC_META_FO:
+ instr_assign(ctx, instr, name + instr->fo.off);
+ return;
+ case OPC_META_FI:
+ instr_assign(ctx, instr, name - (r - 1));
+ return;
+ default:
+ break;
+ }
}
}
-static void ra_assign_dst_fanout(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
+static void instr_assign(struct ir3_ra_ctx *ctx,
+ struct ir3_instruction *instr, unsigned name)
{
- struct ra_assign_visitor *a = ra_assign_visitor(v);
- struct ir3_register *src = instr->regs[1];
- ra_assign_reg(v, instr, reg);
- if (src->flags & IR3_REG_SSA)
- ra_assign(a->ctx, src->instr, a->num - instr->fo.off);
-}
+ struct ir3_instruction *n;
+ struct ir3_register *reg = instr->regs[0];
-static void ra_assign_src_fanout(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
-{
- struct ra_assign_visitor *a = ra_assign_visitor(v);
- ra_assign_reg(v, instr, reg);
- ra_assign(a->ctx, instr, a->num + instr->fo.off);
-}
+ /* check if already assigned: */
+ if (!(reg->flags & IR3_REG_SSA)) {
+ /* ... and if so, sanity check: */
+ ra_assert(ctx, reg->num == (name & ~REG_HALF));
+ return;
+ }
+ /* rename this instructions dst register: */
+ reg_assign(instr, 0, name);
-static void ra_assign_src_fanin(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
-{
- struct ra_assign_visitor *a = ra_assign_visitor(v);
- unsigned j, srcn = ir3_instr_regno(instr, reg) - 1;
- ra_assign_reg(v, instr, reg);
- ra_assign(a->ctx, instr, a->num - srcn);
- for (j = 1; j < instr->regs_count; j++) {
- struct ir3_register *reg = instr->regs[j];
- if (reg->flags & IR3_REG_SSA) /* could be renamed already */
- ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1);
+ /* and rename any subsequent use of result of this instr: */
+ for (n = instr->next; n && !ctx->error; n = n->next) {
+ unsigned i;
+
+ for (i = 1; i < n->regs_count; i++) {
+ reg = n->regs[i];
+ if ((reg->flags & IR3_REG_SSA) && (reg->instr == instr))
+ instr_assign_src(ctx, n, i, name);
+ }
}
-}
-static const struct ir3_visitor_funcs assign_visitor_funcs = {
- .instr = ir3_visit_instr,
- .dst_shader_input = ra_assign_dst_shader_input,
- .dst_fanout = ra_assign_dst_fanout,
- .dst_fanin = ra_assign_reg,
- .dst = ra_assign_reg,
- .src_fanout = ra_assign_src_fanout,
- .src_fanin = ra_assign_src_fanin,
- .src = ra_assign_reg,
-};
+ /* To simplify the neighbor logic, and to "avoid" dealing with
+ * instructions which write more than one output, we actually
+ * do register assignment for instructions that produce multiple
+ * outputs on the fanout nodes and propagate up the assignment
+ * to the actual instruction:
+ */
+ if (is_meta(instr) && (instr->opc == OPC_META_FO)) {
+ struct ir3_instruction *src = ssa(instr->regs[1]);
+ debug_assert(name >= instr->fo.off);
+ if (src)
+ instr_assign(ctx, src, name - instr->fo.off);
+ }
+}
-static void ra_assign(struct ir3_ra_ctx *ctx,
- struct ir3_instruction *assigner, int num)
+/* check neighbor list to see if it is already partially (or completely)
+ * assigned, in which case register block is already allocated and we
+ * just need to complete the assignment:
+ */
+static int check_partial_assignment(struct ir3_ra_ctx *ctx,
+ struct ir3_instruction *instr)
{
- struct ra_assign_visitor v = {
- .base.funcs = &assign_visitor_funcs,
- .ctx = ctx,
- .num = num,
- };
+ struct ir3_instruction *n;
+ int off = 0;
- /* if we've already visited this instruction, bail now: */
- if (ir3_instr_check_mark(assigner)) {
- debug_assert(assigner->regs[0]->num == (num & ~REG_HALF));
- if (assigner->regs[0]->num != (num & ~REG_HALF)) {
- /* impossible situation, should have been resolved
- * at an earlier stage by inserting extra mov's:
- */
- ctx->error = true;
+ debug_assert(!instr->cp.left);
+
+ for (n = instr; n; n = n->cp.right) {
+ struct ir3_register *dst = n->regs[0];
+ if (!(dst->flags & IR3_REG_SSA)) {
+ int name = dst->num - off;
+ debug_assert(name >= 0);
+ return name;
}
- return;
+ off++;
}
- ir3_visit_instr(&v.base, assigner);
+ return -1;
}
-/*
- *
+/* allocate register name(s) for a list of neighboring instructions;
+ * instr should point to leftmost neighbor (head of list)
*/
-
-static void ir3_instr_ra(struct ir3_ra_ctx *ctx,
+static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr)
{
+ struct ir3_instruction *n;
struct ir3_register *dst;
- unsigned num;
+ int name;
+
+ debug_assert(!instr->cp.left);
- /* skip over nop's */
if (instr->regs_count == 0)
return;
dst = instr->regs[0];
- /* if we've already visited this instruction, bail now: */
- if (instr->flags & IR3_INSTR_MARK)
+ /* for instructions w/ fanouts, do the actual register assignment
+ * on the group of fanout neighbor nodes and propagate the reg
+ * name back up to the texture instruction.
+ */
+ if (dst->wrmask != 0x1)
return;
+ name = check_partial_assignment(ctx, instr);
+
/* allocate register(s): */
- if (is_addr(instr)) {
- num = instr->regs[2]->num;
+ if (name >= 0) {
+ /* already partially assigned, just finish the job */
+ } else if (is_addr(instr)) {
+ debug_assert(!instr->cp.right);
+ name = instr->regs[2]->num;
} else if (reg_gpr(dst)) {
- struct ir3_ra_assignment a;
- a = ra_calc(instr);
- num = alloc_block(ctx, instr, a.num) + a.off;
+ int size;
+ /* number of consecutive registers to assign: */
+ size = ir3_neighbor_count(instr);
+ if (dst->wrmask != 0x1)
+ size = MAX2(size, ffs(~dst->wrmask) - 1);
+ name = alloc_block(ctx, instr, size);
} else if (dst->flags & IR3_REG_ADDR) {
+ debug_assert(!instr->cp.right);
dst->flags &= ~IR3_REG_ADDR;
- num = regid(REG_A0, 0) | REG_HALF;
+ name = regid(REG_A0, 0) | REG_HALF;
} else {
+ debug_assert(!instr->cp.right);
/* predicate register (p0).. etc */
- num = regid(REG_P0, 0);
- debug_assert(dst->num == num);
+ name = regid(REG_P0, 0);
+ debug_assert(dst->num == name);
}
- ra_assign(ctx, instr, num);
+ ra_assert(ctx, name >= 0);
+
+ for (n = instr; n && !ctx->error; n = n->cp.right) {
+ instr_assign(ctx, n, name);
+ name++;
+ }
}
static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
struct ir3_instruction *n;
- ra_dump_list("before:\n", block->head);
-
- if (!block->parent) {
- unsigned i, j;
- int base, off = output_base(ctx);
-
- base = alloc_block(ctx, NULL, block->noutputs + off);
-
- if (ctx->half_precision)
- base |= REG_HALF;
-
- for (i = 0; i < block->noutputs; i++)
- if (block->outputs[i] && !is_kill(block->outputs[i]))
- ra_assign(ctx, block->outputs[i], base + i + off);
-
- if (ctx->type == SHADER_FRAGMENT) {
- i = 0;
- if (ctx->frag_face) {
- /* if we have frag_face, it gets hr0.x */
- ra_assign(ctx, block->inputs[i], REG_HALF | 0);
- i += 4;
- }
- for (j = 0; i < block->ninputs; i++, j++)
- if (block->inputs[i])
- ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + j);
- } else {
- for (i = 0; i < block->ninputs; i++)
- if (block->inputs[i])
- ir3_instr_ra(ctx, block->inputs[i]);
+ /* frag shader inputs get pre-assigned, since we have some
+ * constraints/unknowns about setup for some of these regs:
+ */
+ if ((ctx->type == SHADER_FRAGMENT) && !block->parent) {
+ unsigned i = 0, j;
+ if (ctx->frag_face) {
+ /* if we have frag_face, it gets hr0.x */
+ instr_assign(ctx, block->inputs[i], REG_HALF | 0);
+ i += 4;
}
+ for (j = 0; i < block->ninputs; i++, j++)
+ if (block->inputs[i])
+ instr_assign(ctx, block->inputs[i], j);
}
- ra_dump_list("after:\n", block->head);
+ ra_dump_list("-------\n", block->head);
- /* then loop over instruction list and assign registers:
- */
- for (n = block->head; n; n = n->next) {
+ for (n = block->head; n && !ctx->error; n = n->next) {
ra_dump_instr("ASSIGN: ", n);
- ir3_instr_ra(ctx, n);
- if (ctx->error)
- return -1;
- ra_dump_list("-------", block->head);
+ instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
+ ra_dump_list("-------\n", block->head);
}
- return 0;
+ return ctx->error ? -1 : 0;
}
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
- bool half_precision, bool frag_coord, bool frag_face)
+ bool frag_coord, bool frag_face)
{
struct ir3_instruction *n;
struct ir3_ra_ctx ctx = {
.block = block,
.type = type,
- .half_precision = half_precision,
.frag_coord = frag_coord,
.frag_face = frag_face,
};
@@ -672,6 +538,8 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
/* mark dst registers w/ SSA flag so we can see which
* have been assigned so far:
+ * NOTE: we really should set SSA flag consistently on
+ * every dst register in the frontend.
*/
for (n = block->head; n; n = n->next)
if (n->regs_count > 0)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_visitor.h b/src/gallium/drivers/freedreno/ir3/ir3_visitor.h
deleted file mode 100644
index 1c60d1620ca..00000000000
--- a/src/gallium/drivers/freedreno/ir3/ir3_visitor.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
-/*
- * Copyright (C) 2014 Rob Clark <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Rob Clark <[email protected]>
- */
-
-#ifndef IR3_VISITOR_H_
-#define IR3_VISITOR_H_
-
-/**
- * Visitor which follows dst to src relationships between instructions,
- * first visiting the dst (writer) instruction, followed by src (reader)
- * instruction(s).
- *
- * TODO maybe we want multiple different visitors to walk the
- * graph in different ways?
- */
-
-struct ir3_visitor;
-
-typedef void (*ir3_visit_instr_func)(struct ir3_visitor *v,
- struct ir3_instruction *instr);
-
-typedef void (*ir3_visit_reg_func)(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg);
-
-struct ir3_visitor_funcs {
- ir3_visit_instr_func instr; // TODO do we need??
-
- ir3_visit_reg_func dst_shader_input;
- ir3_visit_reg_func dst_block_input;
- ir3_visit_reg_func dst_fanout;
- ir3_visit_reg_func dst_fanin;
- ir3_visit_reg_func dst;
-
- ir3_visit_reg_func src_block_input;
- ir3_visit_reg_func src_fanout;
- ir3_visit_reg_func src_fanin;
- ir3_visit_reg_func src;
-};
-
-struct ir3_visitor {
- const struct ir3_visitor_funcs *funcs;
- bool error;
-};
-
-#include "util/u_debug.h"
-
-static void visit_instr_dst(struct ir3_visitor *v,
- struct ir3_instruction *instr)
-{
- struct ir3_register *reg = instr->regs[0];
-
- if (is_meta(instr)) {
- switch (instr->opc) {
- case OPC_META_INPUT:
- if (instr->regs_count == 1)
- v->funcs->dst_shader_input(v, instr, reg);
- else
- v->funcs->dst_block_input(v, instr, reg);
- return;
- case OPC_META_FO:
- v->funcs->dst_fanout(v, instr, reg);
- return;
- case OPC_META_FI:
- v->funcs->dst_fanin(v, instr, reg);
- return;
- default:
- break;
-
- }
- }
-
- v->funcs->dst(v, instr, reg);
-}
-
-static void visit_instr_src(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
-{
- if (is_meta(instr)) {
- switch (instr->opc) {
- case OPC_META_INPUT:
- /* shader-input does not have a src, only block input: */
- debug_assert(instr->regs_count == 2);
- v->funcs->src_block_input(v, instr, reg);
- return;
- case OPC_META_FO:
- v->funcs->src_fanout(v, instr, reg);
- return;
- case OPC_META_FI:
- v->funcs->src_fanin(v, instr, reg);
- return;
- default:
- break;
-
- }
- }
-
- v->funcs->src(v, instr, reg);
-}
-
-static void ir3_visit_instr(struct ir3_visitor *v,
- struct ir3_instruction *instr)
-{
- struct ir3_instruction *n;
-
- /* visit instruction that assigns value: */
- if (instr->regs_count > 0)
- visit_instr_dst(v, instr);
-
- /* and of any following instructions which read that value: */
- n = instr->next;
- while (n && !v->error) {
- unsigned i;
-
- for (i = 1; i < n->regs_count; i++) {
- struct ir3_register *reg = n->regs[i];
- if ((reg->flags & IR3_REG_SSA) && (reg->instr == instr))
- visit_instr_src(v, n, reg);
- }
-
- n = n->next;
- }
-}
-
-static void ir3_visit_reg(struct ir3_visitor *v,
- struct ir3_instruction *instr, struct ir3_register *reg)
-{
- /* no-op */
-}
-
-#endif /* IR3_VISITOR_H_ */