aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4/vc4_qpu_schedule.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-12-01 11:48:20 -0800
committerEric Anholt <[email protected]>2014-12-01 22:29:42 -0800
commit29c7cf2b2ba850cf467167548d53383e1338fd5c (patch)
treeff978d0d716844b991b5e235483efbc53f2c9e73 /src/gallium/drivers/vc4/vc4_qpu_schedule.c
parent7b0067d23a6f64cf83c42e7f11b2cd4100c569fe (diff)
vc4: Pair up QPU instructions when scheduling.
We've got two mostly-independent operations in each QPU instruction, so try to pack two operations together. This is fairly naive (doesn't track read and write separately in instructions, doesn't convert ADD-based MOVs into MUL-based movs, doesn't reorder across uniform loads), but does show a decent improvement on shader-db-2. total instructions in shared programs: 59583 -> 57651 (-3.24%) instructions in affected programs: 47361 -> 45429 (-4.08%)
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_qpu_schedule.c')
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_schedule.c79
1 files changed, 62 insertions, 17 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index f309034fba7..8aa83741ff5 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -465,7 +465,8 @@ get_instruction_priority(uint64_t inst)
static struct schedule_node *
choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
- struct simple_node *schedule_list)
+ struct simple_node *schedule_list,
+ uint64_t prev_inst)
{
struct schedule_node *chosen = NULL;
struct simple_node *node;
@@ -490,6 +491,15 @@ choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
if (pixel_scoreboard_too_soon(scoreboard, inst))
continue;
+ /* If we're trying to pair with another instruction, check
+ * that they're compatible.
+ */
+ if (prev_inst != 0) {
+ inst = qpu_merge_inst(prev_inst, inst);
+ if (!inst)
+ continue;
+ }
+
int prio = get_instruction_priority(inst);
/* Found a valid instruction. If nothing better comes along,
@@ -571,6 +581,23 @@ compute_delay(struct schedule_node *n)
}
static void
+mark_instruction_scheduled(struct simple_node *schedule_list,
+ struct schedule_node *node)
+{
+ if (!node)
+ return;
+
+ for (int i = node->child_count - 1; i >= 0; i--) {
+ struct schedule_node *child =
+ node->children[i];
+
+ child->parent_count--;
+ if (child->parent_count == 0)
+ insert_at_head(schedule_list, &child->link);
+ }
+}
+
+static void
schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
{
struct simple_node *node, *t;
@@ -598,7 +625,9 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
while (!is_empty_list(schedule_list)) {
struct schedule_node *chosen =
choose_instruction_to_schedule(&scoreboard,
- schedule_list);
+ schedule_list,
+ 0);
+ struct schedule_node *merge = NULL;
/* If there are no valid instructions to schedule, drop a NOP
* in.
@@ -610,12 +639,38 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
dump_state(schedule_list);
fprintf(stderr, "chose: ");
vc4_qpu_disasm(&inst, 1);
- fprintf(stderr, "\n\n");
+ fprintf(stderr, "\n");
}
- /* Schedule this instruction onto the QPU list. */
- if (chosen)
+ /* Schedule this instruction onto the QPU list. Also try to
+ * find an instruction to pair with it.
+ */
+ if (chosen) {
remove_from_list(&chosen->link);
+
+ merge = choose_instruction_to_schedule(&scoreboard,
+ schedule_list,
+ inst);
+ if (merge) {
+ remove_from_list(&merge->link);
+ inst = qpu_merge_inst(inst, merge->inst->inst);
+ assert(inst != 0);
+
+ if (debug) {
+ fprintf(stderr, "merging: ");
+ vc4_qpu_disasm(&merge->inst->inst, 1);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "resulting in: ");
+ vc4_qpu_disasm(&inst, 1);
+ fprintf(stderr, "\n");
+ }
+ }
+ }
+
+ if (debug) {
+ fprintf(stderr, "\n");
+ }
+
qpu_serialize_one_inst(c, inst);
update_scoreboard_for_chosen(&scoreboard, inst);
@@ -625,18 +680,8 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
* be scheduled. Update the children's unblocked time for this
* DAG edge as we do so.
*/
- if (chosen) {
- for (int i = chosen->child_count - 1; i >= 0; i--) {
- struct schedule_node *child =
- chosen->children[i];
-
- child->parent_count--;
- if (child->parent_count == 0) {
- insert_at_head(schedule_list,
- &child->link);
- }
- }
- }
+ mark_instruction_scheduled(schedule_list, chosen);
+ mark_instruction_scheduled(schedule_list, merge);
scoreboard.tick++;
}