diff options
author | Alyssa Rosenzweig <[email protected]> | 2019-08-30 12:56:55 -0700 |
---|---|---|
committer | Alyssa Rosenzweig <[email protected]> | 2019-08-30 15:50:28 -0700 |
commit | d699a17475b5d123e6a22778e8ac6e005774ce92 (patch) | |
tree | 4c52e52fe69d0719bebc6c511ac14f9f26cf7c08 | |
parent | 5e06d90c4510eb3a8c42b0e0d1a3ebfd19830069 (diff) |
pan/midgard: Schedule before RA
This is a tradeoff.
Scheduling before RA means we don't do RA on what-will-become pipeline
registers. Importantly, it means the scheduler is able to reorder
instructions, as registers have not been decided yet.
Unfortunately, it also complicates register spilling, since the spills
themselves won't get bundled optimally and we can only spill twice per
ALU bundle (only one spill per bundle allowed here). It also prevents us
from eliminating dead moves introduced by register allocation, as they
are not dead before RA. The shader-db regressions are from poor spilling
choices introduced by the new bundling requirements. These could be
solved by the combination of a post-scheduler (to combine adjacent
spills into bundles) with a VLIW-aware spill cost calculation.
Nevertheless, the change is small enough that I feel it's worth it to
eat a tiny shader-db regression for the sake of flexibility.
Signed-off-by: Alyssa Rosenzweig <[email protected]>
-rw-r--r-- | src/panfrost/midgard/midgard_schedule.c | 56 |
1 files changed, 29 insertions, 27 deletions
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index 6693a1b725b..8f86701e33f 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -798,11 +798,26 @@ static void mir_spill_register( ra_set_node_spill_cost(g, i, 1.0); } - mir_foreach_instr_global(ctx, ins) { - if (ins->no_spill && - ins->dest >= 0 && - ins->dest < ctx->temp_count) - ra_set_node_spill_cost(g, ins->dest, -1.0); + /* We can't spill any bundles that contain unspills. This could be + * optimized to allow use of r27 to spill twice per bundle, but if + * you're at the point of optimizing spilling, it's too late. */ + + mir_foreach_block(ctx, block) { + mir_foreach_bundle_in_block(block, bun) { + bool no_spill = false; + + for (unsigned i = 0; i < bun->instruction_count; ++i) + no_spill |= bun->instructions[i]->no_spill; + + if (!no_spill) + continue; + + for (unsigned i = 0; i < bun->instruction_count; ++i) { + unsigned dest = bun->instructions[i]->dest; + if (dest < ctx->temp_count) + ra_set_node_spill_cost(g, dest, -1.0); + } + } } int spill_node = ra_get_best_spill_node(g); @@ -831,7 +846,8 @@ static void mir_spill_register( if (is_special_w) spill_slot = spill_index++; - mir_foreach_instr_global_safe(ctx, ins) { + mir_foreach_block(ctx, block) { + mir_foreach_instr_in_block_safe(block, ins) { if (ins->dest != spill_node) continue; midgard_instruction st; @@ -841,17 +857,19 @@ static void mir_spill_register( st.no_spill = true; } else { ins->dest = SSA_FIXED_REGISTER(26); + ins->no_spill = true; st = v_load_store_scratch(ins->dest, spill_slot, true, ins->mask); } /* Hint: don't rewrite this node */ st.hint = true; - mir_insert_instruction_before(ctx, mir_next_op(ins), st); + mir_insert_instruction_after_scheduled(ctx, block, ins, st); if (!is_special) ctx->spills++; } + } } /* For special reads, figure out how many components we need */ @@ -915,7 +933,7 @@ static void mir_spill_register( st.mask = read_mask; - mir_insert_instruction_before(ctx, before, st); + mir_insert_instruction_before_scheduled(ctx, block, before, st); // consecutive_skip = true; } else { /* Special writes already have their move spilled in */ @@ -962,8 +980,11 @@ schedule_program(compiler_context *ctx) mir_foreach_block(ctx, block) { midgard_opt_dead_move_eliminate(ctx, block); + schedule_block(ctx, block); } + mir_create_pipeline_registers(ctx); + do { if (spilled) mir_spill_register(ctx, g, &spill_count); @@ -974,25 +995,6 @@ schedule_program(compiler_context *ctx) g = allocate_registers(ctx, &spilled); } while(spilled && ((iter_count--) > 0)); - /* We can simplify a bit after RA */ - - mir_foreach_block(ctx, block) { - midgard_opt_post_move_eliminate(ctx, block, g); - } - - /* After RA finishes, we schedule all at once */ - - mir_foreach_block(ctx, block) { - schedule_block(ctx, block); - } - - /* Finally, we create pipeline registers as a peephole pass after - * scheduling. This isn't totally optimal, since there are cases where - * the usage of pipeline registers can eliminate spills, but it does - * save some power */ - - mir_create_pipeline_registers(ctx); - if (iter_count <= 0) { fprintf(stderr, "panfrost: Gave up allocating registers, rendering will be incomplete\n"); assert(0); |