aboutsummaryrefslogtreecommitdiffstats
path: root/src/panfrost
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2020-06-16 19:06:21 -0400
committerAlyssa Rosenzweig <[email protected]>2020-07-02 14:41:04 -0400
commit7b0a4f977b98cc49df5b2233e1674e63b05a7b25 (patch)
tree4c90f0e11f718bc81fc812800c3366c6e0e96d41 /src/panfrost
parenta6f0d7f003bc22e0668ea2ad97161dd4c079e594 (diff)
pan/mdg: Schedule based on liveness
By estimating liveness in the scheduler and choosing instructions likely to reduce register pressure, on average we can decrease pressure given a sufficiently larger window. On the other hand, decreasing pressure instead of leaning too heavily on the search window enables us to use a much larger search window without inflating pressure too much. So by doing both in lockstep, we benefit pretty well. total instructions in shared programs: 49458 -> 48540 (-1.86%) instructions in affected programs: 26931 -> 26013 (-3.41%) helped: 221 HURT: 15 helped stats (abs) min: 1 max: 36 x̄: 4.37 x̃: 2 helped stats (rel) min: 0.31% max: 16.90% x̄: 4.97% x̃: 3.85% HURT stats (abs) min: 1 max: 4 x̄: 3.13 x̃: 3 HURT stats (rel) min: 0.50% max: 7.14% x̄: 4.53% x̃: 4.55% 95% mean confidence interval for instructions value: -4.65 -3.13 95% mean confidence interval for instructions %-change: -4.94% -3.81% Instructions are helped. total bundles in shared programs: 25199 -> 23446 (-6.96%) bundles in affected programs: 21600 -> 19847 (-8.12%) helped: 277 HURT: 170 helped stats (abs) min: 1 max: 45 x̄: 7.33 x̃: 6 helped stats (rel) min: 1.06% max: 33.83% x̄: 11.01% x̃: 8.57% HURT stats (abs) min: 1 max: 6 x̄: 1.63 x̃: 1 HURT stats (rel) min: 1.19% max: 40.00% x̄: 13.36% x̃: 11.11% 95% mean confidence interval for bundles value: -4.61 -3.23 95% mean confidence interval for bundles %-change: -3.00% -0.49% Bundles are helped. total quadwords in shared programs: 40269 -> 39652 (-1.53%) quadwords in affected programs: 35881 -> 35264 (-1.72%) helped: 242 HURT: 244 helped stats (abs) min: 1 max: 36 x̄: 4.61 x̃: 3 helped stats (rel) min: 0.39% max: 16.33% x̄: 5.33% x̃: 5.13% HURT stats (abs) min: 1 max: 20 x̄: 2.04 x̃: 1 HURT stats (rel) min: 0.81% max: 21.74% x̄: 7.57% x̃: 6.25% 95% mean confidence interval for quadwords value: -1.71 -0.83 95% mean confidence interval for quadwords %-change: 0.46% 1.82% Inconclusive result (value mean confidence interval and %-change mean confidence interval disagree). total registers in shared programs: 3786 -> 3336 (-11.89%) registers in affected programs: 2161 -> 1711 (-20.82%) helped: 262 HURT: 35 helped stats (abs) min: 1 max: 7 x̄: 1.87 x̃: 1 helped stats (rel) min: 6.25% max: 66.67% x̄: 28.91% x̃: 25.00% HURT stats (abs) min: 1 max: 3 x̄: 1.11 x̃: 1 HURT stats (rel) min: 7.69% max: 100.00% x̄: 19.76% x̃: 12.50% 95% mean confidence interval for registers value: -1.70 -1.33 95% mean confidence interval for registers %-change: -25.56% -20.79% Registers are helped. total threads in shared programs: 2453 -> 2592 (5.67%) threads in affected programs: 160 -> 299 (86.87%) helped: 79 HURT: 6 helped stats (abs) min: 1 max: 2 x̄: 1.85 x̃: 2 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% HURT stats (abs) min: 1 max: 2 x̄: 1.17 x̃: 1 HURT stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for threads value: 1.45 1.82 95% mean confidence interval for threads %-change: 81.08% 97.75% Threads are [helped]. total spills in shared programs: 168 -> 17 (-89.88%) spills in affected programs: 167 -> 16 (-90.42%) helped: 13 HURT: 0 total fills in shared programs: 186 -> 35 (-81.18%) fills in affected programs: 186 -> 35 (-81.18%) helped: 14 HURT: 0 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5513>
Diffstat (limited to 'src/panfrost')
-rw-r--r--src/panfrost/midgard/midgard_schedule.c107
1 files changed, 84 insertions, 23 deletions
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index 3aee91222ef..c5a4dea67f9 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -572,9 +572,56 @@ mir_has_unit(midgard_instruction *ins, unsigned unit)
return false;
}
+/* Net change in liveness if an instruction were scheduled. Loosely based on
+ * ir3's scheduler. */
+
+static int
+mir_live_effect(uint16_t *liveness, midgard_instruction *ins, bool destructive)
+{
+ /* TODO: what if dest is used multiple times? */
+ int free_live = 0;
+
+ if (ins->dest < SSA_FIXED_MINIMUM) {
+ unsigned bytemask = mir_bytemask(ins);
+ bytemask = util_next_power_of_two(bytemask + 1) - 1;
+ free_live += util_bitcount(liveness[ins->dest] & bytemask);
+
+ if (destructive)
+ liveness[ins->dest] &= ~bytemask;
+ }
+
+ int new_live = 0;
+
+ mir_foreach_src(ins, s) {
+ unsigned S = ins->src[s];
+
+ bool dupe = false;
+
+ for (unsigned q = 0; q < s; ++q)
+ dupe |= (ins->src[q] == S);
+
+ if (dupe)
+ continue;
+
+ if (S < SSA_FIXED_MINIMUM) {
+ unsigned bytemask = mir_bytemask_of_read_components(ins, S);
+ bytemask = util_next_power_of_two(bytemask + 1) - 1;
+
+ /* Count only the new components */
+ new_live += util_bitcount(bytemask & ~(liveness[S]));
+
+ if (destructive)
+ liveness[S] |= bytemask;
+ }
+ }
+
+ return new_live - free_live;
+}
+
static midgard_instruction *
mir_choose_instruction(
midgard_instruction **instructions,
+ uint16_t *liveness,
BITSET_WORD *worklist, unsigned count,
struct midgard_predicate *predicate)
{
@@ -595,6 +642,7 @@ mir_choose_instruction(
unsigned i;
signed best_index = -1;
+ signed best_effect = INT_MAX;
bool best_conditional = false;
/* Enforce a simple metric limiting distance to keep down register
@@ -602,7 +650,7 @@ mir_choose_instruction(
* results */
unsigned max_active = 0;
- unsigned max_distance = 6;
+ unsigned max_distance = 36;
BITSET_FOREACH_SET(i, worklist, count) {
max_active = MAX2(max_active, i);
@@ -655,15 +703,19 @@ mir_choose_instruction(
if (conditional && no_cond)
continue;
- /* Simulate in-order scheduling */
- if ((signed) i < best_index)
+ int effect = mir_live_effect(liveness, instructions[i], false);
+
+ if (effect > best_effect)
+ continue;
+
+ if (effect == best_effect && (signed) i < best_index)
continue;
+ best_effect = effect;
best_index = i;
best_conditional = conditional;
}
-
/* Did we find anything? */
if (best_index < 0)
@@ -686,6 +738,7 @@ mir_choose_instruction(
/* Once we schedule a conditional, we can't again */
predicate->no_cond |= best_conditional;
+ mir_live_effect(liveness, instructions[best_index], true);
}
return instructions[best_index];
@@ -697,6 +750,7 @@ mir_choose_instruction(
static unsigned
mir_choose_bundle(
midgard_instruction **instructions,
+ uint16_t *liveness,
BITSET_WORD *worklist, unsigned count)
{
/* At the moment, our algorithm is very simple - use the bundle of the
@@ -709,7 +763,7 @@ mir_choose_bundle(
.exclude = ~0
};
- midgard_instruction *chosen = mir_choose_instruction(instructions, worklist, count, &predicate);
+ midgard_instruction *chosen = mir_choose_instruction(instructions, liveness, worklist, count, &predicate);
if (chosen)
return chosen->type;
@@ -721,6 +775,7 @@ mir_choose_bundle(
static void
mir_choose_alu(midgard_instruction **slot,
midgard_instruction **instructions,
+ uint16_t *liveness,
BITSET_WORD *worklist, unsigned len,
struct midgard_predicate *predicate,
unsigned unit)
@@ -731,7 +786,7 @@ mir_choose_alu(midgard_instruction **slot,
/* Try to schedule something, if not */
predicate->unit = unit;
- *slot = mir_choose_instruction(instructions, worklist, len, predicate);
+ *slot = mir_choose_instruction(instructions, liveness, worklist, len, predicate);
/* Store unit upon scheduling */
if (*slot && !((*slot)->compact_branch))
@@ -898,6 +953,7 @@ mir_schedule_condition(compiler_context *ctx,
static midgard_bundle
mir_schedule_texture(
midgard_instruction **instructions,
+ uint16_t *liveness,
BITSET_WORD *worklist, unsigned len,
bool is_vertex)
{
@@ -908,7 +964,7 @@ mir_schedule_texture(
};
midgard_instruction *ins =
- mir_choose_instruction(instructions, worklist, len, &predicate);
+ mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
mir_update_worklist(worklist, len, instructions, ins);
@@ -926,6 +982,7 @@ mir_schedule_texture(
static midgard_bundle
mir_schedule_ldst(
midgard_instruction **instructions,
+ uint16_t *liveness,
BITSET_WORD *worklist, unsigned len)
{
struct midgard_predicate predicate = {
@@ -937,10 +994,10 @@ mir_schedule_ldst(
/* Try to pick two load/store ops. Second not gauranteed to exist */
midgard_instruction *ins =
- mir_choose_instruction(instructions, worklist, len, &predicate);
+ mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
midgard_instruction *pair =
- mir_choose_instruction(instructions, worklist, len, &predicate);
+ mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
struct midgard_bundle out = {
.tag = TAG_LOAD_STORE_4,
@@ -962,6 +1019,7 @@ mir_schedule_zs_write(
compiler_context *ctx,
struct midgard_predicate *predicate,
midgard_instruction **instructions,
+ uint16_t *liveness,
BITSET_WORD *worklist, unsigned len,
midgard_instruction *branch,
midgard_instruction **smul,
@@ -985,7 +1043,7 @@ mir_schedule_zs_write(
predicate->unit = unit_names[i];
midgard_instruction *ins =
- mir_choose_instruction(instructions, worklist, len, predicate);
+ mir_choose_instruction(instructions, liveness, worklist, len, predicate);
if (ins) {
ins->unit = unit_names[i];
@@ -1028,6 +1086,7 @@ static midgard_bundle
mir_schedule_alu(
compiler_context *ctx,
midgard_instruction **instructions,
+ uint16_t *liveness,
BITSET_WORD *worklist, unsigned len)
{
struct midgard_bundle bundle = {};
@@ -1048,7 +1107,7 @@ mir_schedule_alu(
midgard_instruction *sadd = NULL;
midgard_instruction *branch = NULL;
- mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
+ mir_choose_alu(&branch, instructions, liveness, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
mir_update_worklist(worklist, len, instructions, branch);
unsigned writeout = branch ? branch->writeout : 0;
@@ -1123,19 +1182,19 @@ mir_schedule_alu(
}
if (writeout & PAN_WRITEOUT_Z)
- mir_schedule_zs_write(ctx, &predicate, instructions, worklist, len, branch, &smul, &vadd, &vlut, false);
+ mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist, len, branch, &smul, &vadd, &vlut, false);
if (writeout & PAN_WRITEOUT_S)
- mir_schedule_zs_write(ctx, &predicate, instructions, worklist, len, branch, &smul, &vadd, &vlut, true);
+ mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist, len, branch, &smul, &vadd, &vlut, true);
- mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL);
+ mir_choose_alu(&smul, instructions, liveness, worklist, len, &predicate, UNIT_SMUL);
for (unsigned moves = 0; moves < 2; ++moves) {
predicate.moves = moves;
predicate.no_mask = writeout ? (1 << 3) : 0;
- mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
+ mir_choose_alu(&vlut, instructions, liveness, worklist, len, &predicate, UNIT_VLUT);
predicate.no_mask = 0;
- mir_choose_alu(&vadd, instructions, worklist, len, &predicate, UNIT_VADD);
+ mir_choose_alu(&vadd, instructions, liveness, worklist, len, &predicate, UNIT_VADD);
}
mir_update_worklist(worklist, len, instructions, vlut);
@@ -1158,7 +1217,7 @@ mir_schedule_alu(
}
/* Stage 2, let's schedule sadd before vmul for writeout */
- mir_choose_alu(&sadd, instructions, worklist, len, &predicate, UNIT_SADD);
+ mir_choose_alu(&sadd, instructions, liveness, worklist, len, &predicate, UNIT_SADD);
/* Check if writeout reads its own register */
@@ -1191,7 +1250,7 @@ mir_schedule_alu(
predicate.mask = writeout_mask ^ full_mask;
struct midgard_instruction *peaked =
- mir_choose_instruction(instructions, worklist, len, &predicate);
+ mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
if (peaked) {
vmul = peaked;
@@ -1224,7 +1283,7 @@ mir_schedule_alu(
}
}
- mir_choose_alu(&vmul, instructions, worklist, len, &predicate, UNIT_VMUL);
+ mir_choose_alu(&vmul, instructions, liveness, worklist, len, &predicate, UNIT_VMUL);
mir_update_worklist(worklist, len, instructions, vmul);
mir_update_worklist(worklist, len, instructions, sadd);
@@ -1298,6 +1357,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
/* Allocate the worklist */
size_t sz = BITSET_WORDS(len) * sizeof(BITSET_WORD);
BITSET_WORD *worklist = calloc(sz, 1);
+ uint16_t *liveness = calloc(node_count, 2);
mir_initialize_worklist(worklist, instructions, len);
struct util_dynarray bundles;
@@ -1307,15 +1367,15 @@ schedule_block(compiler_context *ctx, midgard_block *block)
unsigned blend_offset = 0;
for (;;) {
- unsigned tag = mir_choose_bundle(instructions, worklist, len);
+ unsigned tag = mir_choose_bundle(instructions, liveness, worklist, len);
midgard_bundle bundle;
if (tag == TAG_TEXTURE_4)
- bundle = mir_schedule_texture(instructions, worklist, len, ctx->stage != MESA_SHADER_FRAGMENT);
+ bundle = mir_schedule_texture(instructions, liveness, worklist, len, ctx->stage != MESA_SHADER_FRAGMENT);
else if (tag == TAG_LOAD_STORE_4)
- bundle = mir_schedule_ldst(instructions, worklist, len);
+ bundle = mir_schedule_ldst(instructions, liveness, worklist, len);
else if (tag == TAG_ALU_4)
- bundle = mir_schedule_alu(ctx, instructions, worklist, len);
+ bundle = mir_schedule_alu(ctx, instructions, liveness, worklist, len);
else
break;
@@ -1360,6 +1420,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
free(instructions); /* Allocated by flatten_mir() */
free(worklist);
+ free(liveness);
}
void