diff options
-rw-r--r-- | src/freedreno/ir3/ir3.h | 4 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_delay.c | 8 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_legalize.c | 40 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_sched.c | 42 |
4 files changed, 45 insertions, 49 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index ac294934133..bf0f92a478a 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1116,8 +1116,6 @@ void ir3_print_instr(struct ir3_instruction *instr); /* delay calculation: */ int ir3_delayslots(struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n); -unsigned ir3_distance(struct ir3_block *block, struct ir3_instruction *instr, - unsigned maxd, bool pred); unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr, bool soft, bool pred); void ir3_remove_nops(struct ir3 *ir); @@ -1359,7 +1357,7 @@ ir3_##name(struct ir3_block *block, \ #define INSTR4(name) __INSTR4(0, name, OPC_##name) /* cat0 instructions: */ -INSTR0(BR) +INSTR1(BR) INSTR0(JUMP) INSTR1(KILL) INSTR0(END) diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 207c8cb91cc..0b796a4183a 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -126,8 +126,8 @@ count_instruction(struct ir3_instruction *n) * find the worst case (shortest) distance (only possible after * individual blocks are all scheduled) */ -unsigned -ir3_distance(struct ir3_block *block, struct ir3_instruction *instr, +static unsigned +distance(struct ir3_block *block, struct ir3_instruction *instr, unsigned maxd, bool pred) { unsigned d = 0; @@ -162,7 +162,7 @@ ir3_distance(struct ir3_block *block, struct ir3_instruction *instr, struct ir3_block *pred = (struct ir3_block *)entry->key; unsigned n; - n = ir3_distance(pred, instr, min, pred); + n = distance(pred, instr, min, pred); min = MIN2(min, n); } @@ -204,7 +204,7 @@ delay_calc_srcn(struct ir3_block *block, } else { delay = ir3_delayslots(assigner, consumer, srcn); } - delay -= ir3_distance(block, assigner, delay, pred); + delay -= distance(block, assigner, delay, pred); } return delay; diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 4b95b905e20..458629021a4 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -553,6 +553,45 @@ mark_xvergence_points(struct ir3 *ir) } } +/* Insert the branch/jump instructions for flow control between blocks. + * Initially this is done naively, without considering if the successor + * block immediately follows the current block (ie. so no jump required), + * but that is cleaned up in resolve_jumps(). + * + * TODO what ensures that the last write to p0.x in a block is the + * branch condition? Have we been getting lucky all this time? + */ +static void +block_sched(struct ir3 *ir) +{ + foreach_block (block, &ir->block_list) { + if (block->successors[1]) { + /* if/else, conditional branches to "then" or "else": */ + struct ir3_instruction *br; + + debug_assert(block->condition); + + /* create "else" branch first (since "then" block should + * frequently/always end up being a fall-thru): + */ + br = ir3_BR(block, block->condition, 0); + br->cat0.inv = true; + br->cat0.target = block->successors[1]; + + /* "then" branch: */ + br = ir3_BR(block, block->condition, 0); + br->cat0.target = block->successors[0]; + + } else if (block->successors[0]) { + /* otherwise unconditional jump to next block: */ + struct ir3_instruction *jmp; + + jmp = ir3_JUMP(block); + jmp->cat0.target = block->successors[0]; + } + } +} + /* Insert nop's required to make this a legal/valid shader program: */ static void nop_sched(struct ir3 *ir) @@ -629,6 +668,7 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) *max_bary = ctx->max_bary; + block_sched(ir); nop_sched(ir); do { diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index 13ec6e023ac..986c80e4940 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -761,48 +761,6 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) } } } - - /* And lastly, insert branch/jump instructions to take us to - * the next block. Later we'll strip back out the branches - * that simply jump to next instruction. - */ - if (block->successors[1]) { - /* if/else, conditional branches to "then" or "else": */ - struct ir3_instruction *br; - - debug_assert(ctx->pred); - debug_assert(block->condition); - - /* create "else" branch first (since "then" block should - * frequently/always end up being a fall-thru): - */ - br = ir3_BR(block); - br->cat0.inv = true; - br->cat0.target = block->successors[1]; - - /* NOTE: we have to hard code delay of 6 above, since - * we want to insert the nop's before constructing the - * branch. Throw in an assert so we notice if this - * ever breaks on future generation: - */ - debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6); - - br = ir3_BR(block); - br->cat0.target = block->successors[0]; - - } else if (block->successors[0]) { - /* otherwise unconditional jump to next block: */ - struct ir3_instruction *jmp; - - jmp = ir3_JUMP(block); - jmp->cat0.target = block->successors[0]; - } - - /* NOTE: if we kept track of the predecessors, we could do a better - * job w/ (jp) flags.. every node w/ > predecessor is a join point. - * Note that as we eliminate blocks which contain only an unconditional - * jump we probably need to propagate (jp) flag.. - */ } int ir3_sched(struct ir3 *ir) |