diff options
Diffstat (limited to 'src/gallium/drivers/freedreno/ir3/ir3_ra.c')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c | 598 |
1 files changed, 233 insertions, 365 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 611b5425466..08540466bb0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -30,7 +30,6 @@ #include "util/u_math.h" #include "ir3.h" -#include "ir3_visitor.h" /* * Register Assignment: @@ -53,7 +52,6 @@ struct ir3_ra_ctx { struct ir3_block *block; enum shader_t type; - bool half_precision; bool frag_coord; bool frag_face; int cnt; @@ -81,6 +79,15 @@ struct ir3_ra_ctx { } \ } while (0) +#define ra_assert(ctx, x) do { \ + debug_assert(x); \ + if (!(x)) { \ + debug_printf("RA: failed assert: %s\n", #x); \ + (ctx)->error = true; \ + }; \ + } while (0) + + /* sorta ugly way to retrofit half-precision support.. rather than * passing extra param around, just OR in a high bit. All the low * value arithmetic (ie. +/- offset within a contiguous vec4, etc) @@ -89,19 +96,6 @@ struct ir3_ra_ctx { */ #define REG_HALF 0x8000 -struct ir3_ra_assignment { - int8_t off; /* offset of instruction dst within range */ - uint8_t num; /* number of components for the range */ -}; - -static void ra_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *assigner, int num); -static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr); - -/* - * Register Allocation: - */ - #define REG(n, wm, f) (struct ir3_register){ \ .flags = (f), \ .num = (n), \ @@ -117,19 +111,34 @@ static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n return NULL; } -static int output_base(struct ir3_ra_ctx *ctx) +/* figure out if an unassigned src register points back to the instr we + * are assigning: + */ +static bool instr_used_by(struct ir3_instruction *instr, + struct ir3_register *src) { - /* ugg, for fragment shader we need to have input at r0.x - * (or at least if there is a way to configure it, I can't - * see how because the blob driver always uses r0.x (ie. - * all zeros) - */ - if (ctx->type == SHADER_FRAGMENT) { - if (ctx->half_precision) - return ctx->frag_face ? 4 : 3; - return ctx->frag_coord ? 8 : 4; - } - return 0; + struct ir3_instruction *src_instr = ssa(src); + unsigned i; + if (instr == src_instr) + return true; + if (src_instr && is_meta(src_instr)) + for (i = 1; i < src_instr->regs_count; i++) + if (instr_used_by(instr, src_instr->regs[i])) + return true; + + return false; +} + +static bool instr_is_output(struct ir3_instruction *instr) +{ + struct ir3_block *block = instr->block; + unsigned i; + + for (i = 0; i < block->noutputs; i++) + if (instr == block->outputs[i]) + return true; + + return false; } /* live means read before written */ @@ -137,100 +146,59 @@ static void compute_liveregs(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, regmask_t *liveregs) { struct ir3_block *block = instr->block; + struct ir3_instruction *n; regmask_t written; - unsigned i, j; + unsigned i; - regmask_init(liveregs); regmask_init(&written); - for (instr = instr->next; instr; instr = instr->next) { + for (n = instr->next; n; n = n->next) { struct ir3_register *r; - if (is_meta(instr)) + if (is_meta(n)) continue; /* check first src's read: */ - for (j = 1; j < instr->regs_count; j++) { - r = reg_check(instr, j); + for (i = 1; i < n->regs_count; i++) { + r = reg_check(n, i); if (r) regmask_set_if_not(liveregs, r, &written); + + /* if any src points back to the instruction(s) in + * the block of neighbors that we are assigning then + * mark any written (clobbered) registers as live: + */ + if (instr_used_by(instr, n->regs[i])) + regmask_or(liveregs, liveregs, &written); } + /* meta-instructions don't actually get scheduled, + * so don't let it's write confuse us.. what we + * really care about is when the src to the meta + * instr was written: + */ + if (is_meta(n)) + continue; + /* then dst written (if assigned already): */ - if (instr->flags & IR3_INSTR_MARK) { - r = reg_check(instr, 0); - if (r) + r = reg_check(n, 0); + if (r) { + /* if an instruction *is* an output, then it is live */ + if (!instr_is_output(n)) regmask_set(&written, r); } + } /* be sure to account for output registers too: */ for (i = 0; i < block->noutputs; i++) { - struct ir3_register reg = REG(output_base(ctx) + i, X, 0); - regmask_set_if_not(liveregs, ®, &written); - } -} - -/* calculate registers that are clobbered before last use of 'assigner'. - * This needs to be done backwards, although it could possibly be - * combined into compute_liveregs(). (Ie. compute_liveregs() could - * reverse the list, then do this part backwards reversing the list - * again back to original order.) Otoh, probably I should try to - * construct a proper interference graph instead. - * - * XXX this need to follow the same recursion path that is used for - * to rename/assign registers (ie. ra_assign_src()).. this is a bit - * ugly right now, maybe refactor into node iterator sort of things - * that iterates nodes in the correct order? - */ -static bool compute_clobbers(struct ir3_ra_ctx *ctx, - struct ir3_instruction *instr, struct ir3_instruction *assigner, - regmask_t *liveregs) -{ - unsigned i; - bool live = false, was_live = false; - - if (instr == NULL) { - struct ir3_block *block = ctx->block; - - /* if at the end, check outputs: */ - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i] == assigner) - return true; - return false; - } - - for (i = 1; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) { - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: - // TODO - assert(0); - break; - case OPC_META_FO: - case OPC_META_FI: - was_live |= compute_clobbers(ctx, instr->next, - instr, liveregs); - break; - default: - break; - } - } - live = true; - break; - } + struct ir3_register *r; + if (!block->outputs[i]) + continue; + r = reg_check(block->outputs[i], 0); + if (r) + regmask_set_if_not(liveregs, r, &written); } - - was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs); - - if (was_live && (instr->regs_count > 0) && - (instr->flags & IR3_INSTR_MARK) && - !is_meta(instr)) - regmask_set(liveregs, instr->regs[0]); - - return live || was_live; } static int find_available(regmask_t *liveregs, int size, bool half) @@ -254,141 +222,39 @@ static int find_available(regmask_t *liveregs, int size, bool half) static int alloc_block(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, int size) { - if (!instr) { - /* special case, allocating shader outputs. At this - * point, nothing is allocated, just start the shader - * outputs at r0.x and let compute_liveregs() take - * care of the rest from here: - */ - return 0; - } else { - struct ir3_register *dst = instr->regs[0]; - regmask_t liveregs; - - compute_liveregs(ctx, instr, &liveregs); - - // XXX XXX XXX XXX XXX XXX XXX XXX XXX - // XXX hack.. maybe ra_calc should give us a list of - // instrs to compute_clobbers() on? - if (is_meta(instr) && (instr->opc == OPC_META_INPUT) && - (instr->regs_count == 1)) { - unsigned i, base = instr->regs[0]->num & ~0x3; - for (i = 0; i < 4; i++) { - struct ir3_instruction *in = NULL; - if ((base + i) < ctx->block->ninputs) - in = ctx->block->inputs[base + i]; - if (in) - compute_clobbers(ctx, in->next, in, &liveregs); - } - } else - // XXX XXX XXX XXX XXX XXX XXX XXX XXX - compute_clobbers(ctx, instr->next, instr, &liveregs); - - return find_available(&liveregs, size, - !!(dst->flags & IR3_REG_HALF)); - } -} - -/* - * Constraint Calculation: - */ - -struct ra_calc_visitor { - struct ir3_visitor base; - struct ir3_ra_assignment a; -}; - -static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v) -{ - return (struct ra_calc_visitor *)v; -} - -/* calculate register assignment for the instruction. If the register - * written by this instruction is required to be part of a range, to - * handle other (input/output/sam/bary.f/etc) contiguous register range - * constraints, that is calculated handled here. - */ -static void ra_calc_dst(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - if (is_tex(instr)) { - c->a.off = 0; - c->a.num = 4; - } else { - c->a.off = 0; - c->a.num = 1; - } -} - -static void -ra_calc_dst_shader_input(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - struct ir3_block *block = instr->block; struct ir3_register *dst = instr->regs[0]; - unsigned base = dst->num & ~0x3; - unsigned i, num = 0; - - assert(!(dst->flags & IR3_REG_IA)); - - /* check what input components we need: */ - for (i = 0; i < 4; i++) { - unsigned idx = base + i; - if ((idx < block->ninputs) && block->inputs[idx]) - num = i + 1; - } - - c->a.off = dst->num - base; - c->a.num = num; -} - -static void ra_calc_src_fanin(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_calc_visitor *c = ra_calc_visitor(v); - unsigned srcn = ir3_instr_regno(instr, reg) - 1; - c->a.off += srcn; - c->a.num += srcn; - c->a.num = MAX2(c->a.num, instr->regs_count - 1); -} - -static const struct ir3_visitor_funcs calc_visitor_funcs = { - .instr = ir3_visit_instr, - .dst_shader_input = ra_calc_dst_shader_input, - .dst_fanout = ra_calc_dst, - .dst_fanin = ra_calc_dst, - .dst = ra_calc_dst, - .src_fanout = ir3_visit_reg, - .src_fanin = ra_calc_src_fanin, - .src = ir3_visit_reg, -}; - -static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner) -{ - struct ra_calc_visitor v = { - .base.funcs = &calc_visitor_funcs, - }; + struct ir3_instruction *n; + regmask_t liveregs; + unsigned name; + + /* should only ever be called w/ head of neighbor list: */ + debug_assert(!instr->cp.left); + + regmask_init(&liveregs); + + for (n = instr; n; n = n->cp.right) + compute_liveregs(ctx, n, &liveregs); + + /* because we do assignment on fanout nodes for wrmask!=0x1, we + * need to handle this special case, where the fanout nodes all + * appear after one or more of the consumers of the src node: + * + * 0098:009: sam _, r2.x + * 0028:010: mul.f r3.z, r4.x, c13.x + * ; we start assigning here for '0098:009: sam'.. but + * ; would miss the usage at '0028:010: mul.f' + * 0101:009: _meta:fo _, _[0098:009: sam], off=2 + */ + if (is_meta(instr) && (instr->opc == OPC_META_FO)) + compute_liveregs(ctx, instr->regs[1]->instr, &liveregs); - ir3_visit_instr(&v.base, assigner); + name = find_available(&liveregs, size, + !!(dst->flags & IR3_REG_HALF)); - return v.a; -} + if (dst->flags & IR3_REG_HALF) + name |= REG_HALF; -/* - * Register Assignment: - */ - -struct ra_assign_visitor { - struct ir3_visitor base; - struct ir3_ra_ctx *ctx; - int num; -}; - -static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v) -{ - return (struct ra_assign_visitor *)v; + return name; } static type_t half_type(type_t type) @@ -459,17 +325,15 @@ static void fixup_half_instr_src(struct ir3_instruction *instr) } } -static void ra_assign_reg(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) +static void reg_assign(struct ir3_instruction *instr, + unsigned r, unsigned name) { - struct ra_assign_visitor *a = ra_assign_visitor(v); + struct ir3_register *reg = instr->regs[r]; reg->flags &= ~IR3_REG_SSA; - reg->num = a->num & ~REG_HALF; + reg->num = name & ~REG_HALF; - assert(reg->num >= 0); - - if (a->num & REG_HALF) { + if (name & REG_HALF) { reg->flags |= IR3_REG_HALF; /* if dst reg being assigned, patch up the instr: */ if (reg == instr->regs[0]) @@ -479,192 +343,194 @@ static void ra_assign_reg(struct ir3_visitor *v, } } -static void ra_assign_dst_shader_input(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) +static void instr_assign(struct ir3_ra_ctx *ctx, + struct ir3_instruction *instr, unsigned name); + +static void instr_assign_src(struct ir3_ra_ctx *ctx, + struct ir3_instruction *instr, unsigned r, unsigned name) { - struct ra_assign_visitor *a = ra_assign_visitor(v); - unsigned i, base = reg->num & ~0x3; - int off = base - reg->num; - - ra_assign_reg(v, instr, reg); - reg->flags |= IR3_REG_IA; - - /* trigger assignment of all our companion input components: */ - for (i = 0; i < 4; i++) { - struct ir3_instruction *in = NULL; - if ((base + i) < instr->block->ninputs) - in = instr->block->inputs[base + i]; - if (in && is_meta(in) && (in->opc == OPC_META_INPUT)) - ra_assign(a->ctx, in, a->num + off + i); + reg_assign(instr, r, name); + + if (is_meta(instr)) { + switch (instr->opc) { + case OPC_META_INPUT: + /* shader-input does not have a src, only block input: */ + debug_assert(instr->regs_count == 2); + instr_assign(ctx, instr, name); + return; + case OPC_META_FO: + instr_assign(ctx, instr, name + instr->fo.off); + return; + case OPC_META_FI: + instr_assign(ctx, instr, name - (r - 1)); + return; + default: + break; + } } } -static void ra_assign_dst_fanout(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) +static void instr_assign(struct ir3_ra_ctx *ctx, + struct ir3_instruction *instr, unsigned name) { - struct ra_assign_visitor *a = ra_assign_visitor(v); - struct ir3_register *src = instr->regs[1]; - ra_assign_reg(v, instr, reg); - if (src->flags & IR3_REG_SSA) - ra_assign(a->ctx, src->instr, a->num - instr->fo.off); -} + struct ir3_instruction *n; + struct ir3_register *reg = instr->regs[0]; -static void ra_assign_src_fanout(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - ra_assign_reg(v, instr, reg); - ra_assign(a->ctx, instr, a->num + instr->fo.off); -} + /* check if already assigned: */ + if (!(reg->flags & IR3_REG_SSA)) { + /* ... and if so, sanity check: */ + ra_assert(ctx, reg->num == (name & ~REG_HALF)); + return; + } + /* rename this instructions dst register: */ + reg_assign(instr, 0, name); -static void ra_assign_src_fanin(struct ir3_visitor *v, - struct ir3_instruction *instr, struct ir3_register *reg) -{ - struct ra_assign_visitor *a = ra_assign_visitor(v); - unsigned j, srcn = ir3_instr_regno(instr, reg) - 1; - ra_assign_reg(v, instr, reg); - ra_assign(a->ctx, instr, a->num - srcn); - for (j = 1; j < instr->regs_count; j++) { - struct ir3_register *reg = instr->regs[j]; - if (reg->flags & IR3_REG_SSA) /* could be renamed already */ - ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1); + /* and rename any subsequent use of result of this instr: */ + for (n = instr->next; n && !ctx->error; n = n->next) { + unsigned i; + + for (i = 1; i < n->regs_count; i++) { + reg = n->regs[i]; + if ((reg->flags & IR3_REG_SSA) && (reg->instr == instr)) + instr_assign_src(ctx, n, i, name); + } } -} -static const struct ir3_visitor_funcs assign_visitor_funcs = { - .instr = ir3_visit_instr, - .dst_shader_input = ra_assign_dst_shader_input, - .dst_fanout = ra_assign_dst_fanout, - .dst_fanin = ra_assign_reg, - .dst = ra_assign_reg, - .src_fanout = ra_assign_src_fanout, - .src_fanin = ra_assign_src_fanin, - .src = ra_assign_reg, -}; + /* To simplify the neighbor logic, and to "avoid" dealing with + * instructions which write more than one output, we actually + * do register assignment for instructions that produce multiple + * outputs on the fanout nodes and propagate up the assignment + * to the actual instruction: + */ + if (is_meta(instr) && (instr->opc == OPC_META_FO)) { + struct ir3_instruction *src = ssa(instr->regs[1]); + debug_assert(name >= instr->fo.off); + if (src) + instr_assign(ctx, src, name - instr->fo.off); + } +} -static void ra_assign(struct ir3_ra_ctx *ctx, - struct ir3_instruction *assigner, int num) +/* check neighbor list to see if it is already partially (or completely) + * assigned, in which case register block is already allocated and we + * just need to complete the assignment: + */ +static int check_partial_assignment(struct ir3_ra_ctx *ctx, + struct ir3_instruction *instr) { - struct ra_assign_visitor v = { - .base.funcs = &assign_visitor_funcs, - .ctx = ctx, - .num = num, - }; + struct ir3_instruction *n; + int off = 0; - /* if we've already visited this instruction, bail now: */ - if (ir3_instr_check_mark(assigner)) { - debug_assert(assigner->regs[0]->num == (num & ~REG_HALF)); - if (assigner->regs[0]->num != (num & ~REG_HALF)) { - /* impossible situation, should have been resolved - * at an earlier stage by inserting extra mov's: - */ - ctx->error = true; + debug_assert(!instr->cp.left); + + for (n = instr; n; n = n->cp.right) { + struct ir3_register *dst = n->regs[0]; + if (!(dst->flags & IR3_REG_SSA)) { + int name = dst->num - off; + debug_assert(name >= 0); + return name; } - return; + off++; } - ir3_visit_instr(&v.base, assigner); + return -1; } -/* - * +/* allocate register name(s) for a list of neighboring instructions; + * instr should point to leftmost neighbor (head of list) */ - -static void ir3_instr_ra(struct ir3_ra_ctx *ctx, +static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) { + struct ir3_instruction *n; struct ir3_register *dst; - unsigned num; + int name; + + debug_assert(!instr->cp.left); - /* skip over nop's */ if (instr->regs_count == 0) return; dst = instr->regs[0]; - /* if we've already visited this instruction, bail now: */ - if (instr->flags & IR3_INSTR_MARK) + /* for instructions w/ fanouts, do the actual register assignment + * on the group of fanout neighbor nodes and propagate the reg + * name back up to the texture instruction. + */ + if (dst->wrmask != 0x1) return; + name = check_partial_assignment(ctx, instr); + /* allocate register(s): */ - if (is_addr(instr)) { - num = instr->regs[2]->num; + if (name >= 0) { + /* already partially assigned, just finish the job */ + } else if (is_addr(instr)) { + debug_assert(!instr->cp.right); + name = instr->regs[2]->num; } else if (reg_gpr(dst)) { - struct ir3_ra_assignment a; - a = ra_calc(instr); - num = alloc_block(ctx, instr, a.num) + a.off; + int size; + /* number of consecutive registers to assign: */ + size = ir3_neighbor_count(instr); + if (dst->wrmask != 0x1) + size = MAX2(size, ffs(~dst->wrmask) - 1); + name = alloc_block(ctx, instr, size); } else if (dst->flags & IR3_REG_ADDR) { + debug_assert(!instr->cp.right); dst->flags &= ~IR3_REG_ADDR; - num = regid(REG_A0, 0) | REG_HALF; + name = regid(REG_A0, 0) | REG_HALF; } else { + debug_assert(!instr->cp.right); /* predicate register (p0).. etc */ - num = regid(REG_P0, 0); - debug_assert(dst->num == num); + name = regid(REG_P0, 0); + debug_assert(dst->num == name); } - ra_assign(ctx, instr, num); + ra_assert(ctx, name >= 0); + + for (n = instr; n && !ctx->error; n = n->cp.right) { + instr_assign(ctx, n, name); + name++; + } } static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) { struct ir3_instruction *n; - ra_dump_list("before:\n", block->head); - - if (!block->parent) { - unsigned i, j; - int base, off = output_base(ctx); - - base = alloc_block(ctx, NULL, block->noutputs + off); - - if (ctx->half_precision) - base |= REG_HALF; - - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i] && !is_kill(block->outputs[i])) - ra_assign(ctx, block->outputs[i], base + i + off); - - if (ctx->type == SHADER_FRAGMENT) { - i = 0; - if (ctx->frag_face) { - /* if we have frag_face, it gets hr0.x */ - ra_assign(ctx, block->inputs[i], REG_HALF | 0); - i += 4; - } - for (j = 0; i < block->ninputs; i++, j++) - if (block->inputs[i]) - ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + j); - } else { - for (i = 0; i < block->ninputs; i++) - if (block->inputs[i]) - ir3_instr_ra(ctx, block->inputs[i]); + /* frag shader inputs get pre-assigned, since we have some + * constraints/unknowns about setup for some of these regs: + */ + if ((ctx->type == SHADER_FRAGMENT) && !block->parent) { + unsigned i = 0, j; + if (ctx->frag_face) { + /* if we have frag_face, it gets hr0.x */ + instr_assign(ctx, block->inputs[i], REG_HALF | 0); + i += 4; } + for (j = 0; i < block->ninputs; i++, j++) + if (block->inputs[i]) + instr_assign(ctx, block->inputs[i], j); } - ra_dump_list("after:\n", block->head); + ra_dump_list("-------\n", block->head); - /* then loop over instruction list and assign registers: - */ - for (n = block->head; n; n = n->next) { + for (n = block->head; n && !ctx->error; n = n->next) { ra_dump_instr("ASSIGN: ", n); - ir3_instr_ra(ctx, n); - if (ctx->error) - return -1; - ra_dump_list("-------", block->head); + instr_alloc_and_assign(ctx, ir3_neighbor_first(n)); + ra_dump_list("-------\n", block->head); } - return 0; + return ctx->error ? -1 : 0; } int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face) + bool frag_coord, bool frag_face) { struct ir3_instruction *n; struct ir3_ra_ctx ctx = { .block = block, .type = type, - .half_precision = half_precision, .frag_coord = frag_coord, .frag_face = frag_face, }; @@ -672,6 +538,8 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, /* mark dst registers w/ SSA flag so we can see which * have been assigned so far: + * NOTE: we really should set SSA flag consistently on + * every dst register in the frontend. */ for (n = block->head; n; n = n->next) if (n->regs_count > 0) |