diff options
author | Rob Clark <[email protected]> | 2015-04-30 11:38:43 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2015-06-21 07:53:09 -0400 |
commit | adf1659ff5f07d907eca552be3b566e408c8601e (patch) | |
tree | 4ff28e1f2e21d41d29bfd36c56828e83497ac9c8 /src/gallium/drivers/freedreno | |
parent | 67d994c6761e09205dbc9a0515c510fc9dde02c7 (diff) |
freedreno/ir3: use standard list implementation
Use standard list_head double-linked list and related iterators,
helpers, etc, rather than weird combo of instruction array and next
pointers depending on stage. Now block has an instrs_list. In
certain stages where we want to remove and re-add to the blocks list
we just use list_replace() to copy the list to a new list_head.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.c | 27 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cp.c | 19 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_depth.c | 68 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_legalize.c | 65 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_print.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c | 87 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_sched.c | 86 |
8 files changed, 161 insertions, 209 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index e015de91c33..84564a9eef7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -81,7 +81,7 @@ void ir3_destroy(struct ir3 *shader) shader->chunk = chunk->next; free(chunk); } - free(shader->instrs); + free(shader->indirects); free(shader->baryfs); free(shader); } @@ -534,28 +534,32 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr, void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, uint32_t gpu_id) { + struct ir3_block *block = shader->block; uint32_t *ptr, *dwords; - uint32_t i; info->max_reg = -1; info->max_half_reg = -1; info->max_const = -1; info->instrs_count = 0; + info->sizedwords = 0; + + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + info->sizedwords += 2; + } /* need a integer number of instruction "groups" (sets of 16 * instructions on a4xx or sets of 4 instructions on a3xx), * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) */ if (gpu_id >= 400) { - info->sizedwords = 2 * align(shader->instrs_count, 16); + info->sizedwords = align(info->sizedwords, 16 * 2); } else { - info->sizedwords = 2 * align(shader->instrs_count, 4); + info->sizedwords = align(info->sizedwords, 4 * 2); } ptr = dwords = calloc(4, info->sizedwords); - for (i = 0; i < shader->instrs_count; i++) { - struct ir3_instruction *instr = shader->instrs[i]; + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { int ret = emit[instr->category](instr, dwords, info); if (ret) goto fail; @@ -581,14 +585,15 @@ static struct ir3_register * reg_create(struct ir3 *shader, return reg; } -static void insert_instr(struct ir3 *shader, +static void insert_instr(struct ir3_block *block, struct ir3_instruction *instr) { + struct ir3 *shader = block->shader; #ifdef DEBUG static uint32_t serialno = 0; instr->serialno = ++serialno; #endif - array_insert(shader->instrs, instr); + list_addtail(&instr->node, &block->instr_list); if (is_input(instr)) array_insert(shader->baryfs, instr); @@ -625,6 +630,8 @@ struct ir3_block * ir3_block_create(struct ir3 *shader, block->shader = shader; + list_inithead(&block->instr_list); + return block; } @@ -652,7 +659,7 @@ struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, instr->block = block; instr->category = category; instr->opc = opc; - insert_instr(block->shader, instr); + insert_instr(block, instr); return instr; } @@ -677,7 +684,7 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) *new_instr = *instr; new_instr->regs = regs; - insert_instr(instr->block->shader, new_instr); + insert_instr(instr->block, new_instr); /* clone registers: */ new_instr->regs_count = 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f37dfab3341..edb5b49e23c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -28,6 +28,7 @@ #include <stdbool.h> #include "util/u_debug.h" +#include "util/list.h" #include "instr-a3xx.h" #include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */ @@ -290,7 +291,9 @@ struct ir3_instruction { */ struct ir3_instruction *fanin; - struct ir3_instruction *next; + /* Entry in ir3_block's instruction list: */ + struct list_head node; + #ifdef DEBUG uint32_t serialno; #endif @@ -321,8 +324,6 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr) struct ir3_heap_chunk; struct ir3 { - unsigned instrs_count, instrs_sz; - struct ir3_instruction **instrs; /* Track bary.f (and ldlv) instructions.. this is needed in * scheduling to ensure that all varying fetches happen before @@ -361,7 +362,7 @@ struct ir3_block { /* only a single address register: */ struct ir3_instruction *address; struct ir3_block *parent; - struct ir3_instruction *head; + struct list_head instr_list; }; struct ir3 * ir3_create(void); @@ -402,11 +403,8 @@ static inline void ir3_clear_mark(struct ir3 *shader) * a block, so tracking the list of instrs globally is * unlikely to be what we want. */ - unsigned i; - for (i = 0; i < shader->instrs_count; i++) { - struct ir3_instruction *instr = shader->instrs[i]; + list_for_each_entry (struct ir3_instruction, instr, &shader->block->instr_list, node) instr->flags &= ~IR3_INSTR_MARK; - } } static inline int ir3_instr_regno(struct ir3_instruction *instr, @@ -756,6 +754,7 @@ int ir3_block_flatten(struct ir3_block *block); /* depth calculation: */ int ir3_delayslots(struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n); +void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); void ir3_block_depth(struct ir3_block *block); /* copy-propagate: */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index fa7d363be7b..350f7dd5e6b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -354,13 +354,6 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags) { struct ir3_register *reg; - /* stay within the block.. don't try to operate across - * basic block boundaries or we'll have problems when - * dealing with multiple basic blocks: - */ - if (is_meta(instr) && (instr->opc == OPC_META_INPUT)) - return instr; - if (is_eligible_mov(instr, !!flags)) { struct ir3_register *reg = instr->regs[1]; struct ir3_instruction *src_instr = ssa(reg); @@ -394,11 +387,11 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags) return instr; } -static void block_cp(struct ir3_block *block) +void ir3_block_cp(struct ir3_block *block) { - unsigned i; + ir3_clear_mark(block->shader); - for (i = 0; i < block->noutputs; i++) { + for (unsigned i = 0; i < block->noutputs; i++) { if (block->outputs[i]) { struct ir3_instruction *out = instr_cp(block->outputs[i], NULL); @@ -407,9 +400,3 @@ static void block_cp(struct ir3_block *block) } } } - -void ir3_block_cp(struct ir3_block *block) -{ - ir3_clear_mark(block->shader); - block_cp(block); -} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index b899c66b37e..601e14a1c85 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -84,25 +84,25 @@ int ir3_delayslots(struct ir3_instruction *assigner, } } -static void insert_by_depth(struct ir3_instruction *instr) +void +ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list) { - struct ir3_block *block = instr->block; - struct ir3_instruction *n = block->head; - struct ir3_instruction *p = NULL; - - while (n && (n != instr) && (n->depth > instr->depth)) { - p = n; - n = n->next; + /* remove from existing spot in list: */ + list_delinit(&instr->node); + + /* find where to re-insert instruction: */ + list_for_each_entry (struct ir3_instruction, pos, list, node) { + if (pos->depth > instr->depth) { + list_add(&instr->node, &pos->node); + return; + } } - - instr->next = n; - if (p) - p->next = instr; - else - block->head = instr; + /* if we get here, we didn't find an insertion spot: */ + list_addtail(&instr->node, list); } -static void ir3_instr_depth(struct ir3_instruction *instr) +static void +ir3_instr_depth(struct ir3_instruction *instr) { struct ir3_instruction *src; @@ -123,42 +123,38 @@ static void ir3_instr_depth(struct ir3_instruction *instr) instr->depth = MAX2(instr->depth, sd); } - /* meta-instructions don't add cycles, other than PHI.. which - * might translate to a real instruction.. - * - * well, not entirely true, fan-in/out, etc might need to need - * to generate some extra mov's in edge cases, etc.. probably - * we might want to do depth calculation considering the worst - * case for these?? - */ if (!is_meta(instr)) instr->depth++; - insert_by_depth(instr); + ir3_insert_by_depth(instr, &instr->block->instr_list); +} + +static void +remove_unused_by_block(struct ir3_block *block) +{ + list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) { + if (!ir3_instr_check_mark(instr)) { + /* mark it, in case it is input, so we can + * remove unused inputs: + */ + instr->depth = DEPTH_UNUSED; + /* and remove from instruction list: */ + list_delinit(&instr->node); + } + } } void ir3_block_depth(struct ir3_block *block) { unsigned i; - block->head = NULL; - ir3_clear_mark(block->shader); for (i = 0; i < block->noutputs; i++) if (block->outputs[i]) ir3_instr_depth(block->outputs[i]); /* mark un-used instructions: */ - for (i = 0; i < block->shader->instrs_count; i++) { - struct ir3_instruction *instr = block->shader->instrs[i]; - - /* just consider instructions within this block: */ - if (instr->block != block) - continue; - - if (!ir3_instr_check_mark(instr)) - instr->depth = DEPTH_UNUSED; - } + remove_unused_by_block(block); /* cleanup unused inputs: */ for (i = 0; i < block->ninputs; i++) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index 61713c25e72..be0b5ce442c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -51,12 +51,9 @@ struct ir3_legalize_ctx { static void legalize(struct ir3_legalize_ctx *ctx) { struct ir3_block *block = ctx->block; - struct ir3_instruction *n; - struct ir3 *shader = block->shader; - struct ir3_instruction *end = - ir3_instr_create(block, 0, OPC_END); struct ir3_instruction *last_input = NULL; struct ir3_instruction *last_rel = NULL; + struct list_head instr_list; regmask_t needs_ss_war; /* write after read */ regmask_t needs_ss; regmask_t needs_sy; @@ -65,9 +62,13 @@ static void legalize(struct ir3_legalize_ctx *ctx) regmask_init(&needs_ss); regmask_init(&needs_sy); - shader->instrs_count = 0; + /* remove all the instructions from the list, we'll be adding + * them back in as we go + */ + list_replace(&block->instr_list, &instr_list); + list_inithead(&block->instr_list); - for (n = block->head; n; n = n->next) { + list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) { struct ir3_register *reg; unsigned i; @@ -140,12 +141,12 @@ static void legalize(struct ir3_legalize_ctx *ctx) } /* need to be able to set (ss) on first instruction: */ - if ((shader->instrs_count == 0) && (n->category >= 5)) + if (list_empty(&block->instr_list) && (n->category >= 5)) ir3_NOP(block); - if (is_nop(n) && shader->instrs_count) { - struct ir3_instruction *last = - shader->instrs[shader->instrs_count-1]; + if (is_nop(n) && !list_empty(&block->instr_list)) { + struct ir3_instruction *last = list_last_entry(&block->instr_list, + struct ir3_instruction, node); if (is_nop(last) && (last->repeat < 5)) { last->repeat++; last->flags |= n->flags; @@ -153,7 +154,7 @@ static void legalize(struct ir3_legalize_ctx *ctx) } } - shader->instrs[shader->instrs_count++] = n; + list_addtail(&n->node, &block->instr_list); if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); @@ -192,35 +193,19 @@ static void legalize(struct ir3_legalize_ctx *ctx) * the (ei) flag: */ if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) { - int i, cnt; + struct ir3_instruction *baryf; - /* note that ir3_instr_create() inserts into - * shader->instrs[] and increments the count.. - * so we need to bump up the cnt initially (to - * avoid it clobbering the last real instr) and - * restore it after. - */ - cnt = ++shader->instrs_count; - - /* inserting instructions would be a bit nicer if list.. */ - for (i = cnt - 2; i >= 0; i--) { - if (shader->instrs[i] == last_input) { - - /* (ss)bary.f (ei)r63.x, 0, r0.x */ - last_input = ir3_instr_create(block, 2, OPC_BARY_F); - last_input->flags |= IR3_INSTR_SS; - ir3_reg_create(last_input, regid(63, 0), 0); - ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0; - ir3_reg_create(last_input, regid(0, 0), 0); + /* (ss)bary.f (ei)r63.x, 0, r0.x */ + baryf = ir3_instr_create(block, 2, OPC_BARY_F); + baryf->flags |= IR3_INSTR_SS; + ir3_reg_create(baryf, regid(63, 0), 0); + ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; + ir3_reg_create(baryf, regid(0, 0), 0); - shader->instrs[i + 1] = last_input; - - break; - } - shader->instrs[i + 1] = shader->instrs[i]; - } + /* insert the dummy bary.f after last_input: */ + list_add(&baryf->node, &last_input->node); - shader->instrs_count = cnt; + last_input = baryf; } last_input->regs[0]->flags |= IR3_REG_EI; } @@ -228,9 +213,11 @@ static void legalize(struct ir3_legalize_ctx *ctx) if (last_rel) last_rel->flags |= IR3_INSTR_UL; - shader->instrs[shader->instrs_count++] = end; + /* create/add 'end' instruction: */ + ir3_instr_create(block, 0, OPC_END); - shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; + list_first_entry(&block->instr_list, struct ir3_instruction, node) + ->flags |= IR3_INSTR_SS | IR3_INSTR_SY; } void ir3_block_legalize(struct ir3_block *block, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index a5c5d3c8efa..755c0c23c36 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -186,9 +186,8 @@ void ir3_print_instr(struct ir3_instruction *instr) static void print_block(struct ir3_block *block, int lvl) { - struct ir3_instruction *instr; tab(lvl); printf("block {\n"); - for (instr = block->head; instr; instr = instr->next) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { print_instr(instr, lvl+1); } tab(lvl); printf("}\n"); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 501352515b5..95f6a81861e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -75,10 +75,10 @@ struct ir3_ra_ctx { # define ra_debug 0 #endif -#define ra_dump_list(msg, n) do { \ +#define ra_dump_list(msg, ir) do { \ if (ra_debug) { \ debug_printf("-- " msg); \ - ir3_print(n->block->shader); \ + ir3_print(ir); \ } \ } while (0) @@ -175,14 +175,13 @@ static void mark_sources(struct ir3_instruction *instr, static void compute_liveregs(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, regmask_t *liveregs) { - struct ir3_block *block = instr->block; - struct ir3_instruction *n; + struct ir3_block *block = ctx->block; regmask_t written; unsigned i; regmask_init(&written); - for (n = instr->next; n; n = n->next) { + list_for_each_entry (struct ir3_instruction, n, &instr->node, node) { struct ir3_register *r; if (is_meta(n)) @@ -411,9 +410,8 @@ static void instr_assign_src(struct ir3_ra_ctx *ctx, static void instr_assign_srcs(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned name) { - struct ir3_instruction *n, *src; - - for (n = instr->next; n && !ctx->error; n = n->next) { + list_for_each_entry (struct ir3_instruction, n, &instr->node, node) { + struct ir3_instruction *src; foreach_ssa_src_n(src, i, n) { unsigned r = i + 1; @@ -424,6 +422,8 @@ static void instr_assign_srcs(struct ir3_ra_ctx *ctx, if (src == instr) instr_assign_src(ctx, n, r, name); } + if (ctx->error) + break; } } @@ -589,14 +589,45 @@ static void instr_assign_array(struct ir3_ra_ctx *ctx, } -static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) +static bool +block_ra(struct ir3_block *block, void *state) { - struct ir3_instruction *n; + struct ir3_ra_ctx *ctx = state; + + ra_dump_list("-------\n", block->shader); + + /* first pass, assign arrays: */ + list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) { + if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) { + debug_assert(!n->cp.left); /* don't think this should happen */ + ra_dump_instr("ASSIGN ARRAY: ", n); + instr_assign_array(ctx, n); + ra_dump_list("-------\n", block->shader); + } + + if (ctx->error) + return false; + } + + list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) { + ra_dump_instr("ASSIGN: ", n); + instr_alloc_and_assign(ctx, ir3_neighbor_first(n)); + ra_dump_list("-------\n", block->shader); + + if (ctx->error) + return false; + } + + return true; +} +static int +shader_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ /* frag shader inputs get pre-assigned, since we have some * constraints/unknowns about setup for some of these regs: */ - if ((ctx->type == SHADER_FRAGMENT) && !block->parent) { + if (ctx->type == SHADER_FRAGMENT) { unsigned i = 0, j; if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) { /* if we have frag_face, it gets hr0.x */ @@ -608,31 +639,23 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) instr_assign(ctx, block->inputs[i], j); } - ra_dump_list("-------\n", block->head); - - /* first pass, assign arrays: */ - for (n = block->head; n && !ctx->error; n = n->next) { - if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) { - debug_assert(!n->cp.left); /* don't think this should happen */ - ra_dump_instr("ASSIGN ARRAY: ", n); - instr_assign_array(ctx, n); - ra_dump_list("-------\n", block->head); - } - } - - for (n = block->head; n && !ctx->error; n = n->next) { - ra_dump_instr("ASSIGN: ", n); - instr_alloc_and_assign(ctx, ir3_neighbor_first(n)); - ra_dump_list("-------\n", block->head); - } + block_ra(block, ctx); return ctx->error ? -1 : 0; } +static bool +block_mark_dst(struct ir3_block *block, void *state) +{ + list_for_each_entry (struct ir3_instruction, n, &block->instr_list, node) + if (n->regs_count > 0) + n->regs[0]->flags |= IR3_REG_SSA; + return true; +} + int ir3_block_ra(struct ir3_block *block, enum shader_t type, bool frag_coord, bool frag_face) { - struct ir3_instruction *n; struct ir3_ra_ctx ctx = { .block = block, .type = type, @@ -648,12 +671,10 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, * NOTE: we really should set SSA flag consistently on * every dst register in the frontend. */ - for (n = block->head; n; n = n->next) - if (n->regs_count > 0) - n->regs[0]->flags |= IR3_REG_SSA; + block_mark_dst(block, &ctx); ir3_clear_mark(block->shader); - ret = block_ra(&ctx, block); + ret = shader_ra(&ctx, block); return ret; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 5ca6d7b62d5..fc41f93b884 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -88,26 +88,21 @@ deepest(struct ir3_instruction **srcs, unsigned nsrcs) return d; } -static unsigned distance(struct ir3_sched_ctx *ctx, - struct ir3_instruction *instr, unsigned maxd) +static unsigned +distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, + unsigned maxd) { - struct ir3_instruction *n = ctx->scheduled; + struct list_head *instr_list = &instr->block->instr_list; unsigned d = 0; - while (n && (n != instr) && (d < maxd)) { + + list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) { + if ((n == instr) || (d >= maxd)) + break; if (is_alu(n) || is_flow(n)) d++; - n = n->next; } - return d; -} -/* TODO maybe we want double linked list? */ -static struct ir3_instruction * prev(struct ir3_instruction *instr) -{ - struct ir3_instruction *p = instr->block->head; - while (p && (p->next != instr)) - p = p->next; - return p; + return d; } static bool is_sfu_or_mem(struct ir3_instruction *instr) @@ -125,25 +120,11 @@ static void schedule(struct ir3_sched_ctx *ctx, * scheduling and depth calculation.. */ if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr)) - schedule(ctx, ir3_NOP(block), false); + ir3_NOP(block); /* remove from depth list: */ - if (remove) { - struct ir3_instruction *p = prev(instr); - - /* NOTE: this can happen for inputs which are not - * read.. in that case there is no need to schedule - * the input, so just bail: - */ - if (instr != (p ? p->next : block->head)) - return; - - if (p) - p->next = instr->next; - else - block->head = instr->next; - } + list_delinit(&instr->node); if (writes_addr(instr)) { assert(ctx->addr == NULL); @@ -157,7 +138,7 @@ static void schedule(struct ir3_sched_ctx *ctx, instr->flags |= IR3_INSTR_MARK; - instr->next = ctx->scheduled; + list_addtail(&instr->node, &instr->block->instr_list); ctx->scheduled = instr; ctx->cnt++; @@ -284,18 +265,6 @@ static int trysched(struct ir3_sched_ctx *ctx, return SCHEDULED; } -static struct ir3_instruction * reverse(struct ir3_instruction *instr) -{ - struct ir3_instruction *reversed = NULL; - while (instr) { - struct ir3_instruction *next = instr->next; - instr->next = reversed; - reversed = instr; - instr = next; - } - return reversed; -} - static bool uses_current_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { @@ -317,16 +286,14 @@ static bool uses_current_pred(struct ir3_sched_ctx *ctx, * other instructions using the current address register: */ static int block_sched_undelayed(struct ir3_sched_ctx *ctx, - struct ir3_block *block) + struct list_head *unscheduled_list) { - struct ir3_instruction *instr = block->head; bool addr_in_use = false; bool pred_in_use = false; bool all_delayed = true; unsigned cnt = ~0, attempted = 0; - while (instr) { - struct ir3_instruction *next = instr->next; + list_for_each_entry_safe(struct ir3_instruction, instr, unscheduled_list, node) { bool addr = uses_current_addr(ctx, instr); bool pred = uses_current_pred(ctx, instr); @@ -347,8 +314,6 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx, attempted++; } - - instr = next; } if (!addr_in_use) @@ -408,7 +373,10 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx, static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) { - struct ir3_instruction *instr; + struct list_head unscheduled_list; + + list_replace(&block->instr_list, &unscheduled_list); + list_inithead(&block->instr_list); /* schedule all the shader input's (meta-instr) first so that * the RA step sees that the input registers contain a value @@ -423,31 +391,22 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) } } - while ((instr = block->head) && !ctx->error) { - /* NOTE: always grab next *before* trysched(), in case the - * instruction is actually scheduled (and therefore moved - * from depth list into scheduled list) - */ - struct ir3_instruction *next = instr->next; + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { int cnt = trysched(ctx, instr); if (cnt == DELAYED) - cnt = block_sched_undelayed(ctx, block); + cnt = block_sched_undelayed(ctx, &unscheduled_list); /* -1 is signal to return up stack, but to us means same as 0: */ cnt = MAX2(0, cnt); cnt += ctx->cnt; - instr = next; /* if deepest remaining instruction cannot be scheduled, try * the increasingly more shallow instructions until needed * number of delay slots is filled: */ - while (instr && (cnt > ctx->cnt)) { - next = instr->next; + list_for_each_entry_safe (struct ir3_instruction, instr, &instr->node, node) trysched(ctx, instr); - instr = next; - } /* and if we run out of instructions that can be scheduled, * then it is time for nop's: @@ -455,9 +414,6 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) while (cnt > ctx->cnt) schedule(ctx, ir3_NOP(block), false); } - - /* at this point, scheduled list is in reverse order, so fix that: */ - block->head = reverse(ctx->scheduled); } int ir3_block_sched(struct ir3_block *block) |