diff options
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/instr-a3xx.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 69 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cp.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_group.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_print.c | 19 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c | 65 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_sched.c | 42 |
8 files changed, 16 insertions, 205 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 6aebb057619..904f88c7e28 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -202,7 +202,6 @@ typedef enum { /* meta instructions (category -1): */ /* placeholder instr to mark shader inputs: */ OPC_META_INPUT = _OPC(-1, 0), - OPC_META_PHI = _OPC(-1, 1), /* The "fan-in" and "fan-out" instructions are used for keeping * track of instructions that write to multiple dst registers * (fan-out) like texture sample instructions, or read multiple diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 781a660bff8..250d4672b6b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -93,7 +93,6 @@ struct ir3_register { */ IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */ IR3_REG_ARRAY = 0x8000, - IR3_REG_PHI_SRC= 0x10000, /* phi src, regs[0]->instr points to phi */ } flags; union { @@ -262,12 +261,6 @@ struct ir3_instruction { int off; /* component/offset */ } fo; struct { - /* used to temporarily hold reference to nir_phi_instr - * until we resolve the phi srcs - */ - void *nphi; - } phi; - struct { struct ir3_block *block; } inout; }; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 4c37461292f..9cbf9ce47f1 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -2594,64 +2594,6 @@ emit_tex_txs(struct ir3_context *ctx, nir_tex_instr *tex) } static void -emit_phi(struct ir3_context *ctx, nir_phi_instr *nphi) -{ - struct ir3_instruction *phi, **dst; - - /* NOTE: phi's should be lowered to scalar at this point */ - compile_assert(ctx, nphi->dest.ssa.num_components == 1); - - dst = get_dst(ctx, &nphi->dest, 1); - - phi = ir3_instr_create2(ctx->block, OPC_META_PHI, - 1 + exec_list_length(&nphi->srcs)); - ir3_reg_create(phi, 0, 0); /* dst */ - phi->phi.nphi = nphi; - - dst[0] = phi; - - put_dst(ctx, &nphi->dest); -} - -/* phi instructions are left partially constructed. We don't resolve - * their srcs until the end of the block, since (eg. loops) one of - * the phi's srcs might be defined after the phi due to back edges in - * the CFG. - */ -static void -resolve_phis(struct ir3_context *ctx, struct ir3_block *block) -{ - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - nir_phi_instr *nphi; - - /* phi's only come at start of block: */ - if (instr->opc != OPC_META_PHI) - break; - - if (!instr->phi.nphi) - break; - - nphi = instr->phi.nphi; - instr->phi.nphi = NULL; - - foreach_list_typed(nir_phi_src, nsrc, node, &nphi->srcs) { - struct ir3_instruction *src = get_src(ctx, &nsrc->src)[0]; - - /* NOTE: src might not be in the same block as it comes from - * according to the phi.. but in the end the backend assumes - * it will be able to assign the same register to each (which - * only works if it is assigned in the src block), so insert - * an extra mov to make sure the phi src is assigned in the - * block it comes from: - */ - src = ir3_MOV(get_block(ctx, nsrc->pred), src, TYPE_U32); - - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; - } - } -} - -static void emit_jump(struct ir3_context *ctx, nir_jump_instr *jump) { switch (jump->type) { @@ -2701,12 +2643,13 @@ emit_instr(struct ir3_context *ctx, nir_instr *instr) } break; } - case nir_instr_type_phi: - emit_phi(ctx, nir_instr_as_phi(instr)); - break; case nir_instr_type_jump: emit_jump(ctx, nir_instr_as_jump(instr)); break; + case nir_instr_type_phi: + /* we have converted phi webs to regs in NIR by now */ + compile_error(ctx, "Unexpected NIR instruction type: %d\n", instr->type); + break; case nir_instr_type_call: case nir_instr_type_parallel_copy: compile_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type); @@ -3180,10 +3123,6 @@ emit_instructions(struct ir3_context *ctx) /* And emit the body: */ ctx->impl = fxn; emit_function(ctx, fxn); - - list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { - resolve_phis(ctx, block); - } } /* from NIR perspective, we actually have inputs. But most of the "inputs" diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 2bf41e722b6..ca4ced73483 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -67,13 +67,7 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) /* TODO: remove this hack: */ if (src_instr->opc == OPC_META_FO) return false; - /* TODO: we currently don't handle left/right neighbors - * very well when inserting parallel-copies into phi.. - * to avoid problems don't eliminate a mov coming out - * of phi.. - */ - if (src_instr->opc == OPC_META_PHI) - return false; + return true; } return false; @@ -328,12 +322,6 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, { struct ir3_instruction *src = ssa(reg); - /* don't propagate copies into a PHI, since we don't know if the - * src block executed: - */ - if (instr->opc == OPC_META_PHI) - return; - if (is_eligible_mov(src, true)) { /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ struct ir3_register *src_reg = src->regs[1]; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index 961fae5c1e9..fecb89ff34b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -133,10 +133,6 @@ restart: conflict = conflicts(instr->cp.left, left) || conflicts(instr->cp.right, right); - /* RA can't yet deal very well w/ group'd phi's: */ - if (instr->opc == OPC_META_PHI) - conflict = true; - /* Mixing array elements and higher register classes * (ie. groups) doesn't really work out in RA. See: * diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index c399c69ea2a..0b0a66b2091 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -50,20 +50,13 @@ static void print_instr_name(struct ir3_instruction *instr) printf("(ss)"); if (is_meta(instr)) { - switch(instr->opc) { - case OPC_META_PHI: - printf("Φ"); - break; - default: - /* shouldn't hit here.. just for debugging: */ - switch (instr->opc) { - case OPC_META_INPUT: printf("_meta:in"); break; - case OPC_META_FO: printf("_meta:fo"); break; - case OPC_META_FI: printf("_meta:fi"); break; + switch (instr->opc) { + case OPC_META_INPUT: printf("_meta:in"); break; + case OPC_META_FO: printf("_meta:fo"); break; + case OPC_META_FI: printf("_meta:fi"); break; - default: printf("_meta:%d", instr->opc); break; - } - break; + /* shouldn't hit here.. just for debugging: */ + default: printf("_meta:%d", instr->opc); break; } } else if (instr->opc == OPC_MOV) { static const char *type[] = { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 9158aa53b7d..56e9782befd 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -484,41 +484,6 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, d = instr; } - if (d->regs[0]->flags & IR3_REG_PHI_SRC) { - struct ir3_instruction *phi = d->regs[0]->instr; - struct ir3_instruction *dd; - int dsz, doff; - - dd = get_definer(ctx, phi, &dsz, &doff); - - *sz = MAX2(*sz, dsz); - *off = doff; - - if (instr_before(dd, d)) { - d = dd; - } - } - - if (d->opc == OPC_META_PHI) { - /* we have already inserted parallel-copies into - * the phi, so we don't need to chase definers - */ - struct ir3_register *src; - struct ir3_instruction *dd = d; - - /* note: don't use foreach_ssa_src as this gets called once - * while assigning regs (which clears SSA flag) - */ - foreach_src(src, d) { - if (!src->instr) - continue; - if (instr_before(src->instr, dd)) - dd = src->instr; - } - - d = dd; - } - if (d->opc == OPC_META_FO) { struct ir3_instruction *dd; int dsz, doff; @@ -713,13 +678,7 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) * to texture sample instructions; We consider these to be * defined at the earliest fanin source. * - * phi: used to merge values from different flow control paths - * to the same reg. Consider defined at earliest phi src, - * and update all the other phi src's (which may come later - * in the program) as users to extend the var's live range. - * - * Most of this, other than phi, is completely handled in the - * get_definer() helper. + * Most of this is handled in the get_definer() helper. * * In either case, we trace the instruction back to the original * definer and consider that as the def/use ip. @@ -734,8 +693,6 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) ir3_lookup_array(ctx->ir, dst->array.id); unsigned i; - debug_assert(!(dst->flags & IR3_REG_PHI_SRC)); - arr->start_ip = MIN2(arr->start_ip, instr->ip); arr->end_ip = MAX2(arr->end_ip, instr->ip); @@ -780,24 +737,6 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) ra_set_node_class(ctx->g, name, ctx->set->classes[id->cls]); } - - /* extend the live range for phi srcs, which may come - * from the bottom of the loop - */ - if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) { - struct ir3_instruction *phi = id->defn->regs[0]->instr; - foreach_ssa_src(src, phi) { - /* if src is after phi, then we need to extend - * the liverange to the end of src's block: - */ - if (src->ip > phi->ip) { - struct ir3_instruction *last = - list_last_entry(&src->block->instr_list, - struct ir3_instruction, node); - ctx->use[name] = MAX2(ctx->use[name], last->ip); - } - } - } } } @@ -1064,7 +1003,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, num += FIRST_HIGH_REG; reg->num = num; - reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC); + reg->flags &= ~IR3_REG_SSA; if (is_half(id->defn)) reg->flags |= IR3_REG_HALF; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index cbb213d7738..72bd0b2ce07 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -516,12 +516,12 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) list_inithead(&block->instr_list); list_inithead(&ctx->depth_list); - /* first a pre-pass to schedule all meta:input/phi instructions + /* first a pre-pass to schedule all meta:input instructions * (which need to appear first so that RA knows the register is * occupied), and move remaining to depth sorted list: */ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - if ((instr->opc == OPC_META_INPUT) || (instr->opc == OPC_META_PHI)) { + if (instr->opc == OPC_META_INPUT) { schedule(ctx, instr); } else { ir3_insert_by_depth(instr, &ctx->depth_list); @@ -633,46 +633,10 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) */ } -/* this is needed to ensure later RA stage succeeds: */ -static void -sched_insert_parallel_copies(struct ir3_block *block) -{ - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - if (instr->opc == OPC_META_PHI) { - struct ir3_register *reg, *reg2; - foreach_src(reg, instr) { - struct ir3_instruction *src = reg->instr; - struct ir3_instruction *mov = NULL; - - /* after CP we could end up w/ duplicate phi srcs: */ - foreach_src(reg2, instr) { - if (reg == reg2) - break; - /* reg2 is before reg1 so already an inserted mov: */ - else if (reg2->instr->regs[1]->instr == src) { - mov = reg2->instr; - break; - } - } - - if (!mov) { - mov = ir3_MOV(src->block, src, TYPE_U32); - mov->regs[0]->flags |= IR3_REG_PHI_SRC; - mov->regs[0]->instr = instr; - } - - reg->instr = mov; - } - } - } -} - int ir3_sched(struct ir3 *ir) { struct ir3_sched_ctx ctx = {0}; - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - sched_insert_parallel_copies(block); - } + ir3_clear_mark(ir); list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { sched_block(&ctx, block); |