summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-02-27 09:56:18 -0500
committerRob Clark <[email protected]>2019-03-03 13:27:50 -0500
commit00f838fa730f5c765902fe2e5ce9754df5276e91 (patch)
tree9b855a29283e348829a004644baf3222d257e91c /src
parent8a5f2d9444879dc4c8b2b1f192b2a667a1278a2b (diff)
freedreno/ir3: track register pressure in sched
Not a perfect solution, and the "pressure" target is hard-coded. But it doesn't really seem to much in the common case, and avoids exploding register usage in dEQP ssbo tests. So this should serve as a stop-gap solution until I have time to re- write the scheduler. Hurts slightly in instruction count, but gains (reduces) slightly the register usage in shader-db. Fixes ~150 dEQP-GLES31.functional.ssbo.* that were failing due to RA fail. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/freedreno/ir3/ir3_depth.c2
-rw-r--r--src/freedreno/ir3/ir3_sched.c97
2 files changed, 90 insertions, 9 deletions
diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c
index e0500b43161..071b5e3c528 100644
--- a/src/freedreno/ir3/ir3_depth.c
+++ b/src/freedreno/ir3/ir3_depth.c
@@ -90,7 +90,7 @@ int ir3_delayslots(struct ir3_instruction *assigner,
* handled with sync bits
*/
- if (is_meta(assigner))
+ if (is_meta(assigner) || is_meta(consumer))
return 0;
if (writes_addr(assigner))
diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c
index 6552980d90c..4380fdf2f9a 100644
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@@ -55,6 +55,7 @@ struct ir3_sched_ctx {
struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
struct ir3_instruction *addr; /* current a0.x user, if any */
struct ir3_instruction *pred; /* current p0.x user, if any */
+ int live_values; /* estimate of current live values */
bool error;
};
@@ -63,6 +64,77 @@ static bool is_sfu_or_mem(struct ir3_instruction *instr)
return is_sfu(instr) || is_mem(instr);
}
+static void
+unuse_each_src(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
+{
+ struct ir3_instruction *src;
+
+ foreach_ssa_src_n(src, n, instr) {
+ if (__is_false_dep(instr, n))
+ continue;
+ if (instr->block != src->block)
+ continue;
+ if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) {
+ unuse_each_src(ctx, src);
+ } else {
+ debug_assert(src->use_count > 0);
+
+ if (--src->use_count == 0) {
+ ctx->live_values -= dest_regs(src);
+ debug_assert(ctx->live_values >= 0);
+ }
+ }
+ }
+}
+
+static void
+use_each_src(struct ir3_instruction *instr)
+{
+ struct ir3_instruction *src;
+
+ foreach_ssa_src_n(src, n, instr) {
+ if (__is_false_dep(instr, n))
+ continue;
+ if (instr->block != src->block)
+ continue;
+ if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) {
+ use_each_src(src);
+ } else {
+ src->use_count++;
+ }
+ }
+}
+
+static void
+update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
+{
+ if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
+ return;
+
+ ctx->live_values += dest_regs(instr);
+ unuse_each_src(ctx, instr);
+}
+
+/* This is *slightly* different than how ir3_cp uses use_count, in that
+ * we just track it per block (because we schedule a block at a time) and
+ * because we don't track meta instructions and false dependencies (since
+ * they don't contribute real register pressure).
+ */
+static void
+update_use_count(struct ir3_block *block)
+{
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ instr->use_count = 0;
+ }
+
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
+ continue;
+
+ use_each_src(instr);
+ }
+}
+
#define NULL_INSTR ((void *)~0)
static void
@@ -105,6 +177,8 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
list_addtail(&instr->node, &instr->block->instr_list);
ctx->scheduled = instr;
+ update_live_values(ctx, instr);
+
if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
clear_cache(ctx, NULL);
} else {
@@ -126,7 +200,7 @@ deepest(struct ir3_instruction **srcs, unsigned nsrcs)
return NULL;
for (; i < nsrcs; i++)
- if (srcs[i] && (srcs[i]->depth > d->depth))
+ if (srcs[i] && (srcs[i]->sun > d->sun))
d = srcs[id = i];
srcs[id] = NULL;
@@ -432,14 +506,18 @@ find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
if (!candidate)
continue;
- delay = delay_calc(ctx->block, candidate, soft, false);
- if (delay < min_delay) {
- best_instr = candidate;
- min_delay = delay;
+ if (ctx->live_values > 16*4) {
+ /* under register pressure, only care about reducing live values: */
+ if (!best_instr || (candidate->sun > best_instr->sun))
+ best_instr = candidate;
+ } else {
+ delay = delay_calc(ctx->block, candidate, soft, false);
+ if ((delay < min_delay) ||
+ ((delay <= (min_delay + 2)) && (candidate->sun > best_instr->sun))) {
+ best_instr = candidate;
+ min_delay = delay;
+ }
}
-
- if (min_delay == 0)
- break;
}
return best_instr;
@@ -714,6 +792,8 @@ int ir3_sched(struct ir3 *ir)
ir3_clear_mark(ir);
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ ctx.live_values = 0;
+ update_use_count(block);
sched_block(&ctx, block);
}
@@ -723,6 +803,7 @@ int ir3_sched(struct ir3 *ir)
if (ctx.error)
return -1;
+
return 0;
}