diff options
author | Rob Clark <[email protected]> | 2020-05-06 10:29:01 -0700 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-05-13 03:28:40 +0000 |
commit | d6706fdc46fc895b3b3d4446af3d4a9ab1e700c0 (patch) | |
tree | 67494a6fa3affa03550d31e938aa6f36efc09d06 /src | |
parent | d95a6e3a0ca2d4a420306dd078cea05d3f21c865 (diff) |
freedreno/ir3/sched: try to avoid syncs
Similar to what we do in postsched. It is useful for pre-RA sched to be
a bit aware of things that would cause syncs. In particular for the tex
fetches, since the vecN src/dst tends to limit postsched's ability to
re-order them.
Signed-off-by: Rob Clark <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4923>
Diffstat (limited to 'src')
-rw-r--r-- | src/freedreno/ir3/ir3_sched.c | 112 |
1 files changed, 99 insertions, 13 deletions
diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index b4f1bc0a187..57567555fd4 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -93,8 +93,12 @@ struct ir3_sched_ctx { struct ir3_instruction *pred; /* current p0.x user, if any */ int remaining_kills; + int remaining_tex; bool error; + + int sfu_delay; + int tex_delay; }; struct ir3_sched_node { @@ -181,6 +185,7 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) ctx->scheduled = instr; if (is_kill(instr)){ + assert(ctx->remaining_kills > 0); ctx->remaining_kills--; } @@ -200,6 +205,32 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) } dag_prune_head(ctx->dag, &n->dag); + + if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH)) + return; + + if (is_sfu(instr)) { + ctx->sfu_delay = 8; + } else if (check_src_cond(instr, is_sfu)) { + ctx->sfu_delay = 0; + } else if (ctx->sfu_delay > 0) { + ctx->sfu_delay--; + } + + if (is_tex_or_prefetch(instr)) { + /* NOTE that this isn't an attempt to hide texture fetch latency, + * but an attempt to hide the cost of switching to another warp. + * If we can, we'd like to try to schedule another texture fetch + * before scheduling something that would sync. + */ + ctx->tex_delay = 10; + assert(ctx->remaining_tex > 0); + ctx->remaining_tex--; + } else if (check_src_cond(instr, is_tex_or_prefetch)) { + ctx->tex_delay = 0; + } else if (ctx->tex_delay > 0) { + ctx->tex_delay--; + } } struct ir3_sched_notes { @@ -406,8 +437,35 @@ live_effect(struct ir3_instruction *instr) return new_live - freed_live; } -static struct ir3_sched_node * choose_instr_inc(struct ir3_sched_ctx *ctx, - struct ir3_sched_notes *notes, bool avoid_output); +/* Determine if this is an instruction that we'd prefer not to schedule + * yet, in order to avoid an (ss)/(sy) sync. This is limited by the + * sfu_delay/tex_delay counters, ie. the more cycles it has been since + * the last SFU/tex, the less costly a sync would be. + */ +static bool +would_sync(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) +{ + if (ctx->sfu_delay) { + if (check_src_cond(instr, is_sfu)) + return true; + } + + /* We mostly just want to try to schedule another texture fetch + * before scheduling something that would (sy) sync, so we can + * limit this rule to cases where there are remaining texture + * fetches + */ + if (ctx->tex_delay && ctx->remaining_tex) { + if (check_src_cond(instr, is_tex_or_prefetch)) + return true; + } + + return false; +} + +static struct ir3_sched_node * +choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, + bool avoid_sync, bool avoid_output); /** * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code @@ -417,12 +475,17 @@ static struct ir3_sched_node * choose_instr_inc(struct ir3_sched_ctx *ctx, * or are even. */ static struct ir3_sched_node * -choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) +choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, + bool avoid_sync) { + const char *mode = avoid_sync ? "-as" : ""; struct ir3_sched_node *chosen = NULL; /* Find a ready inst with regs freed and pick the one with max cost. */ foreach_sched_node (n, &ctx->dag->heads) { + if (avoid_sync && would_sync(ctx, n->instr)) + continue; + unsigned d = ir3_delay_calc(ctx->block, n->instr, false, false); if (d > 0) @@ -440,12 +503,15 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "dec: chose (freed+ready)"); + di(chosen->instr, "dec%s: chose (freed+ready)", mode); return chosen; } /* Find a leader with regs freed and pick the one with max cost. */ foreach_sched_node (n, &ctx->dag->heads) { + if (avoid_sync && would_sync(ctx, n->instr)) + continue; + if (live_effect(n->instr) > -1) continue; @@ -458,7 +524,7 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "dec: chose (freed)"); + di(chosen->instr, "dec%s: chose (freed)", mode); return chosen; } @@ -470,6 +536,9 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) * XXX: Should this prioritize ready? */ foreach_sched_node (n, &ctx->dag->heads) { + if (avoid_sync && would_sync(ctx, n->instr)) + continue; + unsigned d = ir3_delay_calc(ctx->block, n->instr, false, false); if (d > 0) @@ -486,11 +555,14 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "dec: chose (neutral+ready)"); + di(chosen->instr, "dec%s: chose (neutral+ready)", mode); return chosen; } foreach_sched_node (n, &ctx->dag->heads) { + if (avoid_sync && would_sync(ctx, n->instr)) + continue; + if (live_effect(n->instr) > 0) continue; @@ -502,11 +574,11 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) } if (chosen) { - di(chosen->instr, "dec: chose (neutral)"); + di(chosen->instr, "dec%s: chose (neutral)", mode); return chosen; } - return choose_instr_inc(ctx, notes, true); + return choose_instr_inc(ctx, notes, avoid_sync, true); } /** @@ -515,8 +587,9 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) */ static struct ir3_sched_node * choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, - bool avoid_output) + bool avoid_sync, bool avoid_output) { + const char *mode = avoid_sync ? "-as" : ""; struct ir3_sched_node *chosen = NULL; /* @@ -531,6 +604,9 @@ choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, if (avoid_output && n->output) continue; + if (avoid_sync && would_sync(ctx, n->instr)) + continue; + unsigned d = ir3_delay_calc(ctx->block, n->instr, false, false); if (d > 0) @@ -548,7 +624,7 @@ choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, } if (chosen) { - di(chosen->instr, "inc: chose (distance+ready)"); + di(chosen->instr, "inc%s: chose (distance+ready)", mode); return chosen; } @@ -557,6 +633,9 @@ choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, if (avoid_output && n->output) continue; + if (avoid_sync && would_sync(ctx, n->instr)) + continue; + if (!check_instr(ctx, notes, n->instr)) continue; @@ -569,7 +648,7 @@ choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, } if (chosen) { - di(chosen->instr, "inc: chose (distance)"); + di(chosen->instr, "inc%s: chose (distance)", mode); return chosen; } @@ -631,11 +710,15 @@ choose_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) if (chosen) return chosen->instr; - chosen = choose_instr_dec(ctx, notes); + chosen = choose_instr_dec(ctx, notes, true); + if (chosen) + return chosen->instr; + + chosen = choose_instr_dec(ctx, notes, false); if (chosen) return chosen->instr; - chosen = choose_instr_inc(ctx, notes, false); + chosen = choose_instr_inc(ctx, notes, false, false); if (chosen) return chosen->instr; @@ -912,9 +995,12 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) sched_dag_init(ctx); ctx->remaining_kills = 0; + ctx->remaining_tex = 0; foreach_instr_safe (instr, &ctx->unscheduled_list) { if (is_kill(instr)) ctx->remaining_kills++; + if (is_tex_or_prefetch(instr)) + ctx->remaining_tex++; } /* First schedule all meta:input instructions, followed by |