diff options
-rw-r--r-- | src/gallium/drivers/freedreno/Makefile.sources | 4 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 76 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 72 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cp.c | 259 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_group.c | 228 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c | 598 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_visitor.h | 154 |
8 files changed, 622 insertions, 777 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 1cae52905ef..592f4b4a3fa 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -127,10 +127,10 @@ ir3_SOURCES := \ ir3/ir3_depth.c \ ir3/ir3_dump.c \ ir3/ir3_flatten.c \ + ir3/ir3_group.c \ ir3/ir3.h \ ir3/ir3_legalize.c \ ir3/ir3_ra.c \ ir3/ir3_sched.c \ ir3/ir3_shader.c \ - ir3/ir3_shader.h \ - ir3/ir3_visitor.h + ir3/ir3_shader.h diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index bd0c0a5b693..aaa0ff6efa8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -228,26 +228,62 @@ struct ir3_instruction { */ #define DEPTH_UNUSED ~0 unsigned depth; - - /* Used just during cp stage, which comes before depth pass. - * For fanin, where we need a sequence of consecutive registers, - * keep track of each src instructions left (ie 'n-1') and right - * (ie 'n+1') neighbor. The front-end must insert enough mov's - * to ensure that each instruction has at most one left and at - * most one right neighbor. During the copy-propagation pass, - * we only remove mov's when we can preserve this constraint. - */ - struct { - struct ir3_instruction *left, *right; - uint16_t left_cnt, right_cnt; - } cp; }; + + /* Used during CP and RA stages. For fanin and shader inputs/ + * outputs where we need a sequence of consecutive registers, + * keep track of each src instructions left (ie 'n-1') and right + * (ie 'n+1') neighbor. The front-end must insert enough mov's + * to ensure that each instruction has at most one left and at + * most one right neighbor. During the copy-propagation pass, + * we only remove mov's when we can preserve this constraint. + * And during the RA stage, we use the neighbor information to + * allocate a block of registers in one shot. + * + * TODO: maybe just add something like: + * struct ir3_instruction_ref { + * struct ir3_instruction *instr; + * unsigned cnt; + * } + * + * Or can we get away without the refcnt stuff? It seems like + * it should be overkill.. the problem is if, potentially after + * already eliminating some mov's, if you have a single mov that + * needs to be grouped with it's neighbors in two different + * places (ex. shader output and a fanin). + */ + struct { + struct ir3_instruction *left, *right; + uint16_t left_cnt, right_cnt; + } cp; struct ir3_instruction *next; #ifdef DEBUG uint32_t serialno; #endif }; +static inline struct ir3_instruction * +ir3_neighbor_first(struct ir3_instruction *instr) +{ + while (instr->cp.left) + instr = instr->cp.left; + return instr; +} + +static inline int ir3_neighbor_count(struct ir3_instruction *instr) +{ + int num = 1; + + debug_assert(!instr->cp.left); + + while (instr->cp.right) { + num++; + instr = instr->cp.right; + } + + return num; +} + struct ir3_heap_chunk; struct ir3 { @@ -415,6 +451,15 @@ static inline bool writes_pred(struct ir3_instruction *instr) return false; } +/* returns defining instruction for reg */ +/* TODO better name */ +static inline struct ir3_instruction *ssa(struct ir3_register *reg) +{ + if (reg->flags & IR3_REG_SSA) + return reg->instr; + return NULL; +} + static inline bool reg_gpr(struct ir3_register *r) { if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_ADDR)) @@ -443,12 +488,15 @@ void ir3_block_depth(struct ir3_block *block); /* copy-propagate: */ void ir3_block_cp(struct ir3_block *block); +/* group neightbors and insert mov's to resolve conflicts: */ +void ir3_block_group(struct ir3_block *block); + /* scheduling: */ int ir3_block_sched(struct ir3_block *block); /* register assignment: */ int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face); + bool frag_coord, bool frag_face); /* legalize: */ void ir3_block_legalize(struct ir3_block *block, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 081143d5d6e..6c334d200a3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -61,8 +61,10 @@ static void dump_info(struct ir3_shader_variant *so, const char *str) if (block) { for (i = 0; i < block->ninputs; i++) { - if (!block->inputs[i]) + if (!block->inputs[i]) { + debug_printf("; in%d unused\n", i); continue; + } reg = block->inputs[i]->regs[0]; regid = reg->num; debug_printf("@in(%sr%d.%c)\tin%d\n", @@ -71,8 +73,10 @@ if (block) { } for (i = 0; i < block->noutputs; i++) { - if (!block->outputs[i]) + if (!block->outputs[i]) { + debug_printf("; out%d unused\n", i); continue; + } /* kill shows up as a virtual output.. skip it! */ if (is_kill(block->outputs[i])) continue; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index b47aa1d14d8..209621bd013 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -571,23 +571,40 @@ add_dst_reg_wrmask(struct ir3_compile_context *ctx, } else if ((dst->File == TGSI_FILE_TEMPORARY) || (dst->File == TGSI_FILE_OUTPUT) || (dst->File == TGSI_FILE_ADDRESS)) { + struct ir3_instruction *prev = NULL; unsigned i; /* if instruction writes multiple, we need to create * some place-holder collect the registers: */ for (i = 0; i < 4; i++) { - if (wrmask & (1 << i)) { - struct ir3_instruction *collect = - ir3_instr_create(ctx->block, -1, OPC_META_FO); - collect->fo.off = i; - /* unused dst reg: */ - ir3_reg_create(collect, 0, 0); - /* and src reg used to hold original instr */ - ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = instr; - if (!ctx->atomic) - ssa_dst(ctx, collect, dst, chan+i); + /* NOTE: slightly ugly that we setup neighbor ptrs + * for FO here, but handle FI in CP pass.. we should + * probably just always setup neighbor ptrs in the + * frontend? + */ + struct ir3_instruction *split = + ir3_instr_create(ctx->block, -1, OPC_META_FO); + split->fo.off = i; + /* unused dst reg: */ + /* NOTE: set SSA flag on dst here, because unused FO's + * which don't get scheduled will end up not in the + * instruction list when RA sets SSA flag on each dst. + * Slight hack. We really should set SSA flag on + * every dst register in the frontend. + */ + ir3_reg_create(split, 0, IR3_REG_SSA); + /* and src reg used to hold original instr */ + ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr; + if (prev) { + split->cp.left = prev; + split->cp.left_cnt++; + prev->cp.right = split; + prev->cp.right_cnt++; } + if ((wrmask & (1 << i)) && !ctx->atomic) + ssa_dst(ctx, split, dst, chan+i); + prev = split; } } @@ -3120,6 +3137,17 @@ ir3_compile_shader(struct ir3_shader_variant *so, } } + /* if we want half-precision outputs, mark the output registers + * as half: + */ + if (key.half_precision) { + for (i = 0; i < block->noutputs; i++) { + if (!block->outputs[i]) + continue; + block->outputs[i]->regs[0]->flags |= IR3_REG_HALF; + } + } + /* at this point, we want the kill's in the outputs array too, * so that they get scheduled (since they have no dst).. we've * already ensured that the array is big enough in push_block(): @@ -3145,9 +3173,26 @@ ir3_compile_shader(struct ir3_shader_variant *so, ir3_dump_instr_list(block->head); } + ir3_block_depth(block); + + /* First remove all the extra mov's (which we could skip if the + * front-end was clever enough not to insert them in the first + * place). Then figure out left/right neighbors, re-inserting + * extra mov's when needed to avoid conflicts. + */ if (cp && !(fd_mesa_debug & FD_DBG_NOCP)) ir3_block_cp(block); + if (fd_mesa_debug & FD_DBG_OPTMSGS) { + printf("BEFORE GROUPING:\n"); + ir3_dump_instr_list(block->head); + } + + /* Group left/right neighbors, inserting mov's where needed to + * solve conflicts: + */ + ir3_block_group(block); + if (fd_mesa_debug & FD_DBG_OPTDUMP) compile_dump(&ctx); @@ -3169,20 +3214,19 @@ ir3_compile_shader(struct ir3_shader_variant *so, ir3_dump_instr_list(block->head); } - ret = ir3_block_ra(block, so->type, key.half_precision, - so->frag_coord, so->frag_face); + ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face); if (ret) { DBG("RA failed!"); goto out; } - ir3_block_legalize(block, &so->has_samp, &max_bary); - if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER RA:\n"); ir3_dump_instr_list(block->head); } + ir3_block_legalize(block, &so->has_samp, &max_bary); + /* fixup input/outputs: */ for (i = 0; i < so->outputs_count; i++) { so->outputs[i].regid = block->outputs[i*4]->regs[0]->num; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 2076b62acb8..c55425d68d4 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -33,69 +33,14 @@ /* * Copy Propagate: * + * We could eventually drop this, if the front-end did not insert any + * mov's.. For now keeping it as a separate pass since that is less + * painful than updating the existing frontend. It is expected that + * with an eventual new NIR based frontend that we won't need this. */ static void block_cp(struct ir3_block *block); -static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, bool keep); - -/* XXX move this somewhere useful (and rename?) */ -static struct ir3_instruction *ssa(struct ir3_register *reg) -{ - if (reg->flags & IR3_REG_SSA) - return reg->instr; - return NULL; -} - -static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b) -{ - return (a && b) && (a != b); -} - -static void set_neighbors(struct ir3_instruction *instr, - struct ir3_instruction *left, struct ir3_instruction *right) -{ - debug_assert(!conflicts(instr->cp.left, left)); - if (left) { - instr->cp.left_cnt++; - instr->cp.left = left; - } - debug_assert(!conflicts(instr->cp.right, right)); - if (right) { - instr->cp.right_cnt++; - instr->cp.right = right; - } -} - -/* remove neighbor reference, clearing left/right neighbor ptrs when - * there are no more references: - */ -static void remove_neighbors(struct ir3_instruction *instr) -{ - if (instr->cp.left) { - if (--instr->cp.left_cnt == 0) - instr->cp.left = NULL; - } - if (instr->cp.right) { - if (--instr->cp.right_cnt == 0) - instr->cp.right = NULL; - } -} - -/* stop condition for iteration: */ -static bool check_stop(struct ir3_instruction *instr) -{ - if (ir3_instr_check_mark(instr)) - return true; - - /* stay within the block.. don't try to operate across - * basic block boundaries or we'll have problems when - * dealing with multiple basic blocks: - */ - if (is_meta(instr) && (instr->opc == OPC_META_INPUT)) - return true; - - return false; -} +static struct ir3_instruction * instr_cp(struct ir3_instruction *instr); static bool is_eligible_mov(struct ir3_instruction *instr) { @@ -109,23 +54,17 @@ static bool is_eligible_mov(struct ir3_instruction *instr) /* TODO: propagate abs/neg modifiers if possible */ if (src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV)) return false; - if (src_instr) { - /* check that eliminating the move won't result in - * a neighbor conflict, ie. if an instruction feeds - * into multiple fanins it can still only have at - * most one left and one right neighbor: - */ - if (conflicts(instr->cp.left, src_instr->cp.left)) - return false; - if (conflicts(instr->cp.right, src_instr->cp.right)) - return false; - return true; - } + if (!src_instr) + return false; + /* TODO: remove this hack: */ + if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO)) + return false; + return true; } return false; } -static void walk_children(struct ir3_instruction *instr, bool keep) +static void walk_children(struct ir3_instruction *instr) { unsigned i; @@ -133,188 +72,56 @@ static void walk_children(struct ir3_instruction *instr, bool keep) for (i = 1; i < instr->regs_count; i++) { struct ir3_register *src = instr->regs[i]; if (src->flags & IR3_REG_SSA) - src->instr = instr_cp(src->instr, keep); + src->instr = instr_cp(src->instr); } } -static struct ir3_instruction * -instr_cp_fanin(struct ir3_instruction *instr) -{ - unsigned i, j; - - /* we need to handle fanin specially, to detect cases - * when we need to keep a mov - */ - - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *src = instr->regs[i]; - if (src->flags & IR3_REG_SSA) { - struct ir3_instruction *cand = - instr_cp(src->instr, false); - - /* if the candidate is a fanout, then keep - * the move. - * - * This is a bit, um, fragile, but it should - * catch the extra mov's that the front-end - * puts in for us already in these cases. - */ - if (is_meta(cand) && (cand->opc == OPC_META_FO)) - cand = instr_cp(src->instr, true); - - /* we can't have 2 registers referring to the same instruction, so - * go through and check if any already refer to the candidate - * instruction. if so, don't do the propagation. - * - * NOTE: we need to keep this, despite the neighbor - * conflict checks, to avoid A<->B<->A.. - */ - for (j = 1; j < instr->regs_count; j++) - if (instr->regs[j]->instr == cand) - break; - if (j == instr->regs_count) - src->instr = cand; - } - } - - walk_children(instr, false); - - return instr; -} static struct ir3_instruction * -instr_cp(struct ir3_instruction *instr, bool keep) +instr_cp(struct ir3_instruction *instr) { - /* if we've already visited this instruction, bail now: */ - if (check_stop(instr)) + /* stay within the block.. don't try to operate across + * basic block boundaries or we'll have problems when + * dealing with multiple basic blocks: + */ + if (is_meta(instr) && (instr->opc == OPC_META_INPUT)) return instr; - if (is_meta(instr) && (instr->opc == OPC_META_FI)) - return instr_cp_fanin(instr); - - if (!keep && is_eligible_mov(instr)) { + if (is_eligible_mov(instr)) { struct ir3_instruction *src_instr = ssa(instr->regs[1]); - set_neighbors(src_instr, instr->cp.left, instr->cp.right); - remove_neighbors(instr); - return instr_cp(src_instr, false); + return instr_cp(src_instr); } - walk_children(instr, false); + /* Check termination condition before walking children (rather + * than before checking eligible-mov). A mov instruction may + * appear as ssa-src for multiple other instructions, and we + * want to consider it for removal for each, rather than just + * the first one. (But regardless of how many places it shows + * up as a src, we only need to recursively walk the children + * once.) + */ + if (!ir3_instr_check_mark(instr)) + walk_children(instr); return instr; } static void block_cp(struct ir3_block *block) { - unsigned i, j; + unsigned i; for (i = 0; i < block->noutputs; i++) { if (block->outputs[i]) { struct ir3_instruction *out = - instr_cp(block->outputs[i], false); - - /* To deal with things like this: - * - * 43: MOV OUT[2], TEMP[5] - * 44: MOV OUT[0], TEMP[5] - * - * we need to ensure that no two outputs point to - * the same instruction - */ - for (j = 0; j < i; j++) { - if (block->outputs[j] == out) { - out = instr_cp(block->outputs[i], true); - break; - } - } + instr_cp(block->outputs[i]); block->outputs[i] = out; } } } -/* - * Find instruction neighbors: - */ - -static void instr_find_neighbors(struct ir3_instruction *instr) -{ - unsigned i; - - if (check_stop(instr)) - return; - - if (is_meta(instr) && (instr->opc == OPC_META_FI)) { - unsigned n = instr->regs_count; - for (i = 1; i < n; i++) { - struct ir3_instruction *src_instr = ssa(instr->regs[i]); - if (src_instr) { - struct ir3_instruction *left = (i > 1) ? - ssa(instr->regs[i-1]) : NULL; - struct ir3_instruction *right = (i < (n - 1)) ? - ssa(instr->regs[i+1]) : NULL; - set_neighbors(src_instr, left, right); - instr_find_neighbors(src_instr); - } - } - } else { - for (i = 1; i < instr->regs_count; i++) { - struct ir3_instruction *src_instr = ssa(instr->regs[i]); - if (src_instr) - instr_find_neighbors(src_instr); - } - } -} - -static void block_find_neighbors(struct ir3_block *block) -{ - unsigned i; - - for (i = 0; i < block->noutputs; i++) { - if (block->outputs[i]) { - struct ir3_instruction *instr = block->outputs[i]; - instr_find_neighbors(instr); - } - } -} - -static void instr_clear_neighbors(struct ir3_instruction *instr) -{ - unsigned i; - - if (check_stop(instr)) - return; - - instr->cp.left_cnt = 0; - instr->cp.left = NULL; - instr->cp.right_cnt = 0; - instr->cp.right = NULL; - - for (i = 1; i < instr->regs_count; i++) { - struct ir3_instruction *src_instr = ssa(instr->regs[i]); - if (src_instr) - instr_clear_neighbors(src_instr); - } -} - -static void block_clear_neighbors(struct ir3_block *block) -{ - unsigned i; - - for (i = 0; i < block->noutputs; i++) { - if (block->outputs[i]) { - struct ir3_instruction *instr = block->outputs[i]; - instr_clear_neighbors(instr); - } - } -} - void ir3_block_cp(struct ir3_block *block) { ir3_clear_mark(block->shader); - block_clear_neighbors(block); - ir3_clear_mark(block->shader); - block_find_neighbors(block); - ir3_clear_mark(block->shader); block_cp(block); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c new file mode 100644 index 00000000000..f215c1c15d2 --- /dev/null +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -0,0 +1,228 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "freedreno_util.h" + +#include "ir3.h" + +/* + * Find/group instruction neighbors: + */ + +/* stop condition for iteration: */ +static bool check_stop(struct ir3_instruction *instr) +{ + if (ir3_instr_check_mark(instr)) + return true; + + /* stay within the block.. don't try to operate across + * basic block boundaries or we'll have problems when + * dealing with multiple basic blocks: + */ + if (is_meta(instr) && (instr->opc == OPC_META_INPUT)) + return true; + + return false; +} + +/* bleh.. we need to do the same group_n() thing for both inputs/outputs + * (where we have a simple instr[] array), and fanin nodes (where we have + * an extra indirection via reg->instr). + */ +struct group_ops { + struct ir3_instruction *(*get)(void *arr, int idx); + void (*set)(void *arr, int idx, struct ir3_instruction *instr); +}; + +static struct ir3_instruction *arr_get(void *arr, int idx) +{ + return ((struct ir3_instruction **)arr)[idx]; +} +static void arr_set_out(void *arr, int idx, struct ir3_instruction *instr) +{ + ((struct ir3_instruction **)arr)[idx] = instr; +} +static void arr_set_in(void *arr, int idx, struct ir3_instruction *instr) +{ + debug_printf("cannot insert mov before input!\n"); + debug_assert(0); +} +static struct group_ops arr_ops_out = { arr_get, arr_set_out }; +static struct group_ops arr_ops_in = { arr_get, arr_set_in }; + +static struct ir3_instruction *instr_get(void *arr, int idx) +{ + return ssa(((struct ir3_instruction *)arr)->regs[idx+1]); +} +static void instr_set(void *arr, int idx, struct ir3_instruction *instr) +{ + ((struct ir3_instruction *)arr)->regs[idx+1]->instr = instr; +} +static struct group_ops instr_ops = { instr_get, instr_set }; + + + +static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b) +{ + return (a && b) && (a != b); +} + +static struct ir3_instruction * +create_mov(struct ir3_instruction *instr) +{ + struct ir3_instruction *mov; + + mov = ir3_instr_create(instr->block, 1, 0); + mov->cat1.src_type = TYPE_F32; + mov->cat1.dst_type = TYPE_F32; + ir3_reg_create(mov, 0, 0); /* dst */ + ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = instr; + + return mov; +} + +static void group_n(struct group_ops *ops, void *arr, unsigned n) +{ + unsigned i, j; + + /* first pass, figure out what has conflicts and needs a mov + * inserted. Do this up front, before starting to setup + * left/right neighbor pointers. Trying to do it in a single + * pass could result in a situation where we can't even setup + * the mov's right neighbor ptr if the next instr also needs + * a mov. + */ +restart: + for (i = 0; i < n; i++) { + struct ir3_instruction *instr = ops->get(arr, i); + if (instr) { + struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; + struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; + bool conflict; + + /* check for left/right neighbor conflicts: */ + conflict = conflicts(instr->cp.left, left) || + conflicts(instr->cp.right, right); + + /* we also can't have an instr twice in the group: */ + for (j = i + 1; (j < n) && !conflict; j++) + if (ops->get(arr, j) == instr) + conflict = true; + + if (conflict) { + instr = create_mov(instr); + ops->set(arr, i, instr); + /* inserting the mov may have caused a conflict + * against the previous: + */ + goto restart; + } + } + } + + /* second pass, now that we've inserted mov's, fixup left/right + * neighbors. This is guaranteed to succeed, since by definition + * the newly inserted mov's cannot conflict with anything. + */ + for (i = 0; i < n; i++) { + struct ir3_instruction *instr = ops->get(arr, i); + if (instr) { + struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; + struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; + + debug_assert(!conflicts(instr->cp.left, left)); + if (left) { + instr->cp.left_cnt++; + instr->cp.left = left; + } + + debug_assert(!conflicts(instr->cp.right, right)); + if (right) { + instr->cp.right_cnt++; + instr->cp.right = right; + } + } + } +} + +static void instr_find_neighbors(struct ir3_instruction *instr) +{ + unsigned i; + + if (check_stop(instr)) + return; + + if (is_meta(instr) && (instr->opc == OPC_META_FI)) + group_n(&instr_ops, instr, instr->regs_count - 1); + + for (i = 1; i < instr->regs_count; i++) { + struct ir3_instruction *src_instr = ssa(instr->regs[i]); + if (src_instr) + instr_find_neighbors(src_instr); + } +} + +static void block_find_neighbors(struct ir3_block *block) +{ + unsigned i; + + for (i = 0; i < block->noutputs; i++) { + if (block->outputs[i]) { + struct ir3_instruction *instr = block->outputs[i]; + instr_find_neighbors(instr); + } + } + + /* shader inputs/outputs themselves must be contiguous as well: + */ + if (!block->parent) { + /* NOTE: group inputs first, since we only insert mov's + * *before* the conflicted instr (and that would go badly + * for inputs). By doing inputs first, we should never + * have a conflict on inputs.. pushing any conflict to + * resolve to the outputs, for stuff like: + * + * MOV OUT[n], IN[m].wzyx + * + * NOTE: we assume here inputs/outputs are grouped in vec4. + * This logic won't quite cut it if we don't align smaller + * on vec4 boundaries + */ + for (i = 0; i < block->ninputs; i += 4) + group_n(&arr_ops_in, &block->inputs[i], 4); + for (i = 0; i < block->noutputs; i += 4) + group_n(&arr_ops_out, &block->outputs[i], 4); + + } +} + +void ir3_block_group(struct ir3_block *block) +{ + ir3_clear_mark(block->shader); + block_find_neighbors(block); +} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 611b5425466..08540466bb0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -30,7 +30,6 @@ #include "util/u_math.h" #include "ir3.h" -#include "ir3_visitor.h" /* * Register Assignment: @@ -53,7 +52,6 @@ struct ir3_ra_ctx { struct ir3_block *block; enum shader_t type; - bool half_precision; bool frag_coord; bool frag_face; int cnt; @@ -81,6 +79,15 @@ struct ir3_ra_ctx { } \ } while (0) +#define ra_assert(ctx, x) do { \ + debug_assert(x); \ + if (!(x)) { \ + debug_printf("RA: failed assert: %s\n", #x); \ + (ctx)->error = true; \ + }; \ + } while (0) + + /* sorta ugly way to retrofit half-precision support.. rather than * passing extra param around, just OR in a high bit. All the low * value arithmetic (ie. +/- offset within a contiguous vec4, etc) @@ -89,19 +96,6 @@ struct ir3_ra_ctx { */ #define REG_HALF 0x8000 -struct ir3_ra_assignment { - int8_t off; /* offset of instruction dst within range */ - uint8_t num; /* number of components for the range */ -}; - -static void ra_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *assigner, int num); -static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr); - -/* - * Register Allocation: - */ - #define REG(n, wm, f) (struct ir3_register){ \ .flags = (f), \ .num = (n), \ @@ -117,19 +111,34 @@ static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n return NULL; } -static int output_base(struct ir3_ra_ctx *ctx) +/* figure out if an unassigned src register points back to the instr we + * are assigning: + */ +static bool instr_used_by(struct ir3_instruction *instr, + struct ir3_register *src) { - /* ugg, for fragment shader we need to have input at r0.x - * (or at least if there is a way to configure it, I can't - * see how because the blob driver always uses r0.x (ie. - * all zeros) - */ - if (ctx->type == SHADER_FRAGMENT) { - if (ctx->half_precision) - return ctx->frag_face ? 4 : 3; - return ctx->frag_coord ? 8 : 4; - } - return 0; + struct ir3_instruction *src_instr = ssa(src); + unsigned i; + if (instr == src_instr) + return true; + if (src_instr && is_meta(src_instr)) + for (i = 1; i < src_instr->regs_count; i++) + if (instr_used_by(instr, src_instr->regs[i])) + return true; + + return false; +} + +static bool instr_is_output(struct ir3_instruction *instr) +{ + struct ir3_block *block = instr->block; + unsigned i; + + for (i = 0; i < block->noutputs; i++) + if (instr == block->outputs[i]) + return true; + + return false; } /* live means read before written */ @@ -137,100 +146,59 @@ static void compute_liveregs(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, regmask_t *liveregs) { struct ir3_block *block = instr->block; + struct ir3_instruction *n; regmask_t written; - unsigned i, j; + unsigned i; - regmask_init(liveregs); regmask_init(&written); - for (instr = instr->next; instr; instr = instr->next) { + for (n = instr->next; n; n = n->next) { struct ir3_register *r; - if (is_meta(instr)) + if (is_meta(n)) continue; /* check first src's read: */ - for (j = 1; j < instr->regs_count; j++) { - r = reg_check(instr, j); + for (i = 1; i < n->regs_count; i++) { + r = reg_check(n, i); if (r) regmask_set_if_not(liveregs, r, &written); + + /* if any src points back to the instruction(s) in + * the block of neighbors that we are assigning then + * mark any written (clobbered) registers as live: + */ + if (instr_used_by(instr, n->regs[i])) + regmask_or(liveregs, liveregs, &written); } + /* meta-instructions don't actually get scheduled, + * so don't let it's write confuse us.. what we + * really care about is when the src to the meta + * instr was written: + */ + if (is_meta(n)) + continue; + /* then dst written (if assigned already): */ - if (instr->flags & IR3_INSTR_MARK) { - r = reg_check(instr, 0); - if (r) + r = reg_check(n, 0); + if (r) { + /* if an instruction *is* an output, then it is live */ + if (!instr_is_output(n)) regmask_set(&written, r); } + } /* be sure to account for output registers too: */ for (i = 0; i < block->noutputs; i++) { - struct ir3_register reg = REG(output_base(ctx) + i, X, 0); - regmask_set_if_not(liveregs, ®, &written); - } -} - -/* calculate registers that are clobbered before last use of 'assigner'. - * This needs to be done backwards, although it could possibly be - * combined into compute_liveregs(). (Ie. compute_liveregs() could - * reverse the list, then do this part backwards reversing the list - * again back to original order.) Otoh, probably I should try to - * construct a proper interference graph instead. - * - * XXX this need to follow the same recursion path that is used for - * to rename/assign registers (ie. ra_assign_src()).. this is a bit - * ugly right now, maybe refactor into node iterator sort of things - * that iterates nodes in the correct order? - */ -static bool compute_clobbers(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, struct ir3_instruction *assigner, - regmask_t *liveregs) -{ - unsigned i; - bool live = false, was_live = false; - - if (instr == NULL) { - struct ir3_block *block = ctx->block; - - /* if at the end, check outputs: */ - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i] == assigner) - return true; - return false; - } - - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) { - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: - // TODO - assert(0); - break; - case OPC_META_FO: - case OPC_META_FI: - was_live |= compute_clobbers(ctx, instr->next, - instr, liveregs); - break; - default: - break; - } - } - live = true; - break; - } + struct ir3_register *r; + if (!block->outputs[i]) + continue; + r = reg_check(block->outputs[i], 0); + if (r) + regmask_set_if_not(liveregs, r, &written); } - - was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs); - - if (was_live && (instr->regs_count > 0) && - (instr->flags & IR3_INSTR_MARK) && - !is_meta(instr)) - regmask_set(liveregs, instr->regs[0]); - - return live || was_live; } static int find_available(regmask_t *liveregs, int size, bool half) @@ -254,141 +222,39 @@ static int find_available(regmask_t *liveregs, int size, bool half) static int alloc_block(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, int size) { - if (!instr) { - /* special case, allocating shader outputs. At this - * point, nothing is allocated, just start the shader - * outputs at r0.x and let compute_liveregs() take - * care of the rest from here: - */ - return 0; - } else { - struct ir3_register *dst = instr->regs[0]; - regmask_t liveregs; - - compute_liveregs(ctx, instr, &liveregs); - - // XXX XXX XXX XXX XXX XXX XXX XXX XXX - // XXX hack.. maybe ra_calc should give us a list of - // instrs to compute_clobbers() on? - if (is_meta(instr) && (instr->opc == OPC_META_INPUT) && - (instr->regs_count == 1)) { - unsigned i, base = instr->regs[0]->num & ~0x3; - for (i = 0; i < 4; i++) { - struct ir3_instruction *in = NULL; - if ((base + i) < ctx->block->ninputs) - in = ctx->block->inputs[base + i]; - if (in) - compute_clobbers(ctx, in->next, in, &liveregs); - } - } else - // XXX XXX XXX XXX XXX XXX XXX XXX XXX - compute_clobbers(ctx, instr->next, instr, &liveregs); - - return find_available(&liveregs, size, - !!(dst->flags & IR3_REG_HALF)); - } -} - -/* - * Constraint Calculation: - */ - -struct ra_calc_visitor { - struct ir3_visitor base; - struct ir3_ra_assignment a; -}; - -static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v) -{ - return (struct ra_calc_visitor *)v; -} - -/* calculate register assignment for the instruction. If the register - * written by this instruction is required to be part of a range, to - * handle other (input/output/sam/bary.f/etc) contiguous register range - * constraints, that is calculated handled here. - */ -static void ra_calc_dst(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - if (is_tex(instr)) { - c->a.off = 0; - c->a.num = 4; - } else { - c->a.off = 0; - c->a.num = 1; - } -} - -static void -ra_calc_dst_shader_input(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - struct ir3_block *block = instr->block; struct ir3_register *dst = instr->regs[0]; - unsigned base = dst->num & ~0x3; - unsigned i, num = 0; - - assert(!(dst->flags & IR3_REG_IA)); - - /* check what input components we need: */ - for (i = 0; i < 4; i++) { - unsigned idx = base + i; - if ((idx < block->ninputs) && block->inputs[idx]) - num = i + 1; - } - - c->a.off = dst->num - base; - c->a.num = num; -} - -static void ra_calc_src_fanin(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - unsigned srcn = ir3_instr_regno(instr, reg) - 1; - c->a.off += srcn; - c->a.num += srcn; - c->a.num = MAX2(c->a.num, instr->regs_count - 1); -} - -static const struct ir3_visitor_funcs calc_visitor_funcs = { - .instr = ir3_visit_instr, - .dst_shader_input = ra_calc_dst_shader_input, - .dst_fanout = ra_calc_dst, - .dst_fanin = ra_calc_dst, - .dst = ra_calc_dst, - .src_fanout = ir3_visit_reg, - .src_fanin = ra_calc_src_fanin, - .src = ir3_visit_reg, -}; - -static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner) -{ - struct ra_calc_visitor v = { - .base.funcs = &calc_visitor_funcs, - }; + struct ir3_instruction *n; + regmask_t liveregs; + unsigned name; + + /* should only ever be called w/ head of neighbor list: */ + debug_assert(!instr->cp.left); + + regmask_init(&liveregs); + + for (n = instr; n; n = n->cp.right) + compute_liveregs(ctx, n, &liveregs); + + /* because we do assignment on fanout nodes for wrmask!=0x1, we + * need to handle this special case, where the fanout nodes all + * appear after one or more of the consumers of the src node: + * + * 0098:009: sam _, r2.x + * 0028:010: mul.f r3.z, r4.x, c13.x + * ; we start assigning here for '0098:009: sam'.. but + * ; would miss the usage at '0028:010: mul.f' + * 0101:009: _meta:fo _, _[0098:009: sam], off=2 + */ + if (is_meta(instr) && (instr->opc == OPC_META_FO)) + compute_liveregs(ctx, instr->regs[1]->instr, &liveregs); - ir3_visit_instr(&v.base, assigner); + name = find_available(&liveregs, size, + !!(dst->flags & IR3_REG_HALF)); - return v.a; -} + if (dst->flags & IR3_REG_HALF) + name |= REG_HALF; -/* - * Register Assignment: - */ - -struct ra_assign_visitor { - struct ir3_visitor base; - struct ir3_ra_ctx *ctx; - int num; -}; - -static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v) -{ - return (struct ra_assign_visitor *)v; + return name; } static type_t half_type(type_t type) @@ -459,17 +325,15 @@ static void fixup_half_instr_src(struct ir3_instruction *instr) } } -static void ra_assign_reg(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) +static void reg_assign(struct ir3_instruction *instr, + unsigned r, unsigned name) { - struct ra_assign_visitor *a = ra_assign_visitor(v); + struct ir3_register *reg = instr->regs[r]; reg->flags &= ~IR3_REG_SSA; - reg->num = a->num & ~REG_HALF; + reg->num = name & ~REG_HALF; - assert(reg->num >= 0); - - if (a->num & REG_HALF) { + if (name & REG_HALF) { reg->flags |= IR3_REG_HALF; /* if dst reg being assigned, patch up the instr: */ if (reg == instr->regs[0]) @@ -479,192 +343,194 @@ static void ra_assign_reg(struct ir3_visitor *v, } } -static void ra_assign_dst_shader_input(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) +static void instr_assign(struct ir3_ra_ctx *ctx, + struct ir3_instruction *instr, unsigned name); + +static void instr_assign_src(struct ir3_ra_ctx *ctx, + struct ir3_instruction *instr, unsigned r, unsigned name) { - struct ra_assign_visitor *a = ra_assign_visitor(v); - unsigned i, base = reg->num & ~0x3; - int off = base - reg->num; - - ra_assign_reg(v, instr, reg); - reg->flags |= IR3_REG_IA; - - /* trigger assignment of all our companion input components: */ - for (i = 0; i < 4; i++) { - struct ir3_instruction *in = NULL; - if ((base + i) < instr->block->ninputs) - in = instr->block->inputs[base + i]; - if (in && is_meta(in) && (in->opc == OPC_META_INPUT)) - ra_assign(a->ctx, in, a->num + off + i); + reg_assign(instr, r, name); + + if (is_meta(instr)) { + switch (instr->opc) { + case OPC_META_INPUT: + /* shader-input does not have a src, only block input: */ + debug_assert(instr->regs_count == 2); + instr_assign(ctx, instr, name); + return; + case OPC_META_FO: + instr_assign(ctx, instr, name + instr->fo.off); + return; + case OPC_META_FI: + instr_assign(ctx, instr, name - (r - 1)); + return; + default: + break; + } } } -static void ra_assign_dst_fanout(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) +static void instr_assign(struct ir3_ra_ctx *ctx, + struct ir3_instruction *instr, unsigned name) { - struct ra_assign_visitor *a = ra_assign_visitor(v); - struct ir3_register *src = instr->regs[1]; - ra_assign_reg(v, instr, reg); - if (src->flags & IR3_REG_SSA) - ra_assign(a->ctx, src->instr, a->num - instr->fo.off); -} + struct ir3_instruction *n; + struct ir3_register *reg = instr->regs[0]; -static void ra_assign_src_fanout(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - ra_assign_reg(v, instr, reg); - ra_assign(a->ctx, instr, a->num + instr->fo.off); -} + /* check if already assigned: */ + if (!(reg->flags & IR3_REG_SSA)) { + /* ... and if so, sanity check: */ + ra_assert(ctx, reg->num == (name & ~REG_HALF)); + return; + } + /* rename this instructions dst register: */ + reg_assign(instr, 0, name); -static void ra_assign_src_fanin(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - unsigned j, srcn = ir3_instr_regno(instr, reg) - 1; - ra_assign_reg(v, instr, reg); - ra_assign(a->ctx, instr, a->num - srcn); - for (j = 1; j < instr->regs_count; j++) { - struct ir3_register *reg = instr->regs[j]; - if (reg->flags & IR3_REG_SSA) /* could be renamed already */ - ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1); + /* and rename any subsequent use of result of this instr: */ + for (n = instr->next; n && !ctx->error; n = n->next) { + unsigned i; + + for (i = 1; i < n->regs_count; i++) { + reg = n->regs[i]; + if ((reg->flags & IR3_REG_SSA) && (reg->instr == instr)) + instr_assign_src(ctx, n, i, name); + } } -} -static const struct ir3_visitor_funcs assign_visitor_funcs = { - .instr = ir3_visit_instr, - .dst_shader_input = ra_assign_dst_shader_input, - .dst_fanout = ra_assign_dst_fanout, - .dst_fanin = ra_assign_reg, - .dst = ra_assign_reg, - .src_fanout = ra_assign_src_fanout, - .src_fanin = ra_assign_src_fanin, - .src = ra_assign_reg, -}; + /* To simplify the neighbor logic, and to "avoid" dealing with + * instructions which write more than one output, we actually + * do register assignment for instructions that produce multiple + * outputs on the fanout nodes and propagate up the assignment + * to the actual instruction: + */ + if (is_meta(instr) && (instr->opc == OPC_META_FO)) { + struct ir3_instruction *src = ssa(instr->regs[1]); + debug_assert(name >= instr->fo.off); + if (src) + instr_assign(ctx, src, name - instr->fo.off); + } +} -static void ra_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *assigner, int num) +/* check neighbor list to see if it is already partially (or completely) + * assigned, in which case register block is already allocated and we + * just need to complete the assignment: + */ +static int check_partial_assignment(struct ir3_ra_ctx *ctx, + struct ir3_instruction *instr) { - struct ra_assign_visitor v = { - .base.funcs = &assign_visitor_funcs, - .ctx = ctx, - .num = num, - }; + struct ir3_instruction *n; + int off = 0; - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(assigner)) { - debug_assert(assigner->regs[0]->num == (num & ~REG_HALF)); - if (assigner->regs[0]->num != (num & ~REG_HALF)) { - /* impossible situation, should have been resolved - * at an earlier stage by inserting extra mov's: - */ - ctx->error = true; + debug_assert(!instr->cp.left); + + for (n = instr; n; n = n->cp.right) { + struct ir3_register *dst = n->regs[0]; + if (!(dst->flags & IR3_REG_SSA)) { + int name = dst->num - off; + debug_assert(name >= 0); + return name; } - return; + off++; } - ir3_visit_instr(&v.base, assigner); + return -1; } -/* - * +/* allocate register name(s) for a list of neighboring instructions; + * instr should point to leftmost neighbor (head of list) */ - -static void ir3_instr_ra(struct ir3_ra_ctx *ctx, +static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) { + struct ir3_instruction *n; struct ir3_register *dst; - unsigned num; + int name; + + debug_assert(!instr->cp.left); - /* skip over nop's */ if (instr->regs_count == 0) return; dst = instr->regs[0]; - /* if we've already visited this instruction, bail now: */ - if (instr->flags & IR3_INSTR_MARK) + /* for instructions w/ fanouts, do the actual register assignment + * on the group of fanout neighbor nodes and propagate the reg + * name back up to the texture instruction. + */ + if (dst->wrmask != 0x1) return; + name = check_partial_assignment(ctx, instr); + /* allocate register(s): */ - if (is_addr(instr)) { - num = instr->regs[2]->num; + if (name >= 0) { + /* already partially assigned, just finish the job */ + } else if (is_addr(instr)) { + debug_assert(!instr->cp.right); + name = instr->regs[2]->num; } else if (reg_gpr(dst)) { - struct ir3_ra_assignment a; - a = ra_calc(instr); - num = alloc_block(ctx, instr, a.num) + a.off; + int size; + /* number of consecutive registers to assign: */ + size = ir3_neighbor_count(instr); + if (dst->wrmask != 0x1) + size = MAX2(size, ffs(~dst->wrmask) - 1); + name = alloc_block(ctx, instr, size); } else if (dst->flags & IR3_REG_ADDR) { + debug_assert(!instr->cp.right); dst->flags &= ~IR3_REG_ADDR; - num = regid(REG_A0, 0) | REG_HALF; + name = regid(REG_A0, 0) | REG_HALF; } else { + debug_assert(!instr->cp.right); /* predicate register (p0).. etc */ - num = regid(REG_P0, 0); - debug_assert(dst->num == num); + name = regid(REG_P0, 0); + debug_assert(dst->num == name); } - ra_assign(ctx, instr, num); + ra_assert(ctx, name >= 0); + + for (n = instr; n && !ctx->error; n = n->cp.right) { + instr_assign(ctx, n, name); + name++; + } } static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) { struct ir3_instruction *n; - ra_dump_list("before:\n", block->head); - - if (!block->parent) { - unsigned i, j; - int base, off = output_base(ctx); - - base = alloc_block(ctx, NULL, block->noutputs + off); - - if (ctx->half_precision) - base |= REG_HALF; - - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i] && !is_kill(block->outputs[i])) - ra_assign(ctx, block->outputs[i], base + i + off); - - if (ctx->type == SHADER_FRAGMENT) { - i = 0; - if (ctx->frag_face) { - /* if we have frag_face, it gets hr0.x */ - ra_assign(ctx, block->inputs[i], REG_HALF | 0); - i += 4; - } - for (j = 0; i < block->ninputs; i++, j++) - if (block->inputs[i]) - ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + j); - } else { - for (i = 0; i < block->ninputs; i++) - if (block->inputs[i]) - ir3_instr_ra(ctx, block->inputs[i]); + /* frag shader inputs get pre-assigned, since we have some + * constraints/unknowns about setup for some of these regs: + */ + if ((ctx->type == SHADER_FRAGMENT) && !block->parent) { + unsigned i = 0, j; + if (ctx->frag_face) { + /* if we have frag_face, it gets hr0.x */ + instr_assign(ctx, block->inputs[i], REG_HALF | 0); + i += 4; } + for (j = 0; i < block->ninputs; i++, j++) + if (block->inputs[i]) + instr_assign(ctx, block->inputs[i], j); } - ra_dump_list("after:\n", block->head); + ra_dump_list("-------\n", block->head); - /* then loop over instruction list and assign registers: - */ - for (n = block->head; n; n = n->next) { + for (n = block->head; n && !ctx->error; n = n->next) { ra_dump_instr("ASSIGN: ", n); - ir3_instr_ra(ctx, n); - if (ctx->error) - return -1; - ra_dump_list("-------", block->head); + instr_alloc_and_assign(ctx, ir3_neighbor_first(n)); + ra_dump_list("-------\n", block->head); } - return 0; + return ctx->error ? -1 : 0; } int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face) + bool frag_coord, bool frag_face) { struct ir3_instruction *n; struct ir3_ra_ctx ctx = { .block = block, .type = type, - .half_precision = half_precision, .frag_coord = frag_coord, .frag_face = frag_face, }; @@ -672,6 +538,8 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, /* mark dst registers w/ SSA flag so we can see which * have been assigned so far: + * NOTE: we really should set SSA flag consistently on + * every dst register in the frontend. */ for (n = block->head; n; n = n->next) if (n->regs_count > 0) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_visitor.h b/src/gallium/drivers/freedreno/ir3/ir3_visitor.h deleted file mode 100644 index 1c60d1620ca..00000000000 --- a/src/gallium/drivers/freedreno/ir3/ir3_visitor.h +++ /dev/null @@ -1,154 +0,0 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - -/* - * Copyright (C) 2014 Rob Clark <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <[email protected]> - */ - -#ifndef IR3_VISITOR_H_ -#define IR3_VISITOR_H_ - -/** - * Visitor which follows dst to src relationships between instructions, - * first visiting the dst (writer) instruction, followed by src (reader) - * instruction(s). - * - * TODO maybe we want multiple different visitors to walk the - * graph in different ways? - */ - -struct ir3_visitor; - -typedef void (*ir3_visit_instr_func)(struct ir3_visitor *v, - struct ir3_instruction *instr); - -typedef void (*ir3_visit_reg_func)(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg); - -struct ir3_visitor_funcs { - ir3_visit_instr_func instr; // TODO do we need?? - - ir3_visit_reg_func dst_shader_input; - ir3_visit_reg_func dst_block_input; - ir3_visit_reg_func dst_fanout; - ir3_visit_reg_func dst_fanin; - ir3_visit_reg_func dst; - - ir3_visit_reg_func src_block_input; - ir3_visit_reg_func src_fanout; - ir3_visit_reg_func src_fanin; - ir3_visit_reg_func src; -}; - -struct ir3_visitor { - const struct ir3_visitor_funcs *funcs; - bool error; -}; - -#include "util/u_debug.h" - -static void visit_instr_dst(struct ir3_visitor *v, - struct ir3_instruction *instr) -{ - struct ir3_register *reg = instr->regs[0]; - - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: - if (instr->regs_count == 1) - v->funcs->dst_shader_input(v, instr, reg); - else - v->funcs->dst_block_input(v, instr, reg); - return; - case OPC_META_FO: - v->funcs->dst_fanout(v, instr, reg); - return; - case OPC_META_FI: - v->funcs->dst_fanin(v, instr, reg); - return; - default: - break; - - } - } - - v->funcs->dst(v, instr, reg); -} - -static void visit_instr_src(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: - /* shader-input does not have a src, only block input: */ - debug_assert(instr->regs_count == 2); - v->funcs->src_block_input(v, instr, reg); - return; - case OPC_META_FO: - v->funcs->src_fanout(v, instr, reg); - return; - case OPC_META_FI: - v->funcs->src_fanin(v, instr, reg); - return; - default: - break; - - } - } - - v->funcs->src(v, instr, reg); -} - -static void ir3_visit_instr(struct ir3_visitor *v, - struct ir3_instruction *instr) -{ - struct ir3_instruction *n; - - /* visit instruction that assigns value: */ - if (instr->regs_count > 0) - visit_instr_dst(v, instr); - - /* and of any following instructions which read that value: */ - n = instr->next; - while (n && !v->error) { - unsigned i; - - for (i = 1; i < n->regs_count; i++) { - struct ir3_register *reg = n->regs[i]; - if ((reg->flags & IR3_REG_SSA) && (reg->instr == instr)) - visit_instr_src(v, n, reg); - } - - n = n->next; - } -} - -static void ir3_visit_reg(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - /* no-op */ -} - -#endif /* IR3_VISITOR_H_ */ |