aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErico Nunes <[email protected]>2020-05-17 15:56:42 +0200
committerMarge Bot <[email protected]>2020-05-27 21:15:33 +0000
commitb3023055e075386e96fe2fbf093f0db261c0d9fa (patch)
tree4477443381c7151d31d174713fc5d6bbea5482e6
parent9ae8b4af75ea708323352c5c016dc4c72ba9c893 (diff)
lima/ppir: use a ready list in node_to_instr
After the recent optimizations in ppir lowering that increase options for combining, node_to_instr now may have multiple options of nodes to insert and needs to decide which is better. For example, if an instruction uses both a varying and a texture, there are two nodes nodes that can be inserted to the load varying slot in the same instruction (ld_var and ld_coords). It is much more advantageous to pipeline the load texture coords since that enables the higher precision path for texture coordinates. However, with the current recursive expansion, this cannot be influenced. This simple ready list implementation in node_to_instr allows it to choose the next node to expand based on a priority score, rather than relying on the random order coming from the recursive expansion. Other than preferring nodes with pipeline output (which covers ld_coords vs ld_var), nodes using later slots in the pipeline are now expanded first, allowing node_to_instr to make all of the earlier (pipelineable) nodes available in the ready list so the best one can be chosen when picking nodes for the earlier slots. Fixes: 632a921bd0d lima/ppir: optimize tex loads with single successor Signed-off-by: Erico Nunes <[email protected]> Reviewed-by: Vasily Khoruzhick <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5092>
-rw-r--r--src/gallium/drivers/lima/ir/pp/node_to_instr.c104
-rw-r--r--src/gallium/drivers/lima/ir/pp/ppir.h1
2 files changed, 77 insertions, 28 deletions
diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
index 9f21fa31e26..52632c88ce6 100644
--- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c
+++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
@@ -214,40 +214,88 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
return true;
}
-static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
+static unsigned int ppir_node_score(ppir_node *node)
{
- /* first try pipeline sched, if that didn't succeed try normal scheduling */
- if (!ppir_do_node_to_instr_try_insert(block, node))
- if (!ppir_do_one_node_to_instr(block, node))
- return false;
+ /* preferentially expand nodes in later instruction slots first, so
+ * nodes for earlier slots (which are more likely pipelineable) get
+ * added to the ready list. */
+ unsigned int late_slot = 0;
+ int *slots = ppir_op_infos[node->op].slots;
+ if (slots)
+ for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++)
+ late_slot = MAX2(late_slot, slots[i]);
+
+ /* to untie, favour nodes with pipelines for earlier expansion.
+ * increase that for nodes with chained pipelines */
+ unsigned int pipeline = 0;
+ ppir_node *n = node;
+ ppir_dest *dest = ppir_node_get_dest(n);
+ while (dest && dest->type == ppir_target_pipeline) {
+ pipeline++;
+ assert(ppir_node_has_single_src_succ(n));
+ n = ppir_node_first_succ(n);
+ dest = ppir_node_get_dest(n);
+ }
+ assert(pipeline < 4);
- if (node->is_end)
- node->instr->is_end = true;
+ return (late_slot << 2 | pipeline);
+}
- /* we have to make sure the dep not be destroyed (due to
- * succ change) in ppir_do_node_to_instr, otherwise we can't
- * do recursion like this */
- ppir_node_foreach_pred(node, dep) {
- ppir_node *pred = dep->pred;
- bool ready = true;
-
- /* pred may already be processed by the previous pred
- * (this pred may be both node and previous pred's child) */
- if (pred->instr)
- continue;
-
- /* insert pred only when all its successors have been inserted */
- ppir_node_foreach_succ(pred, dep) {
- ppir_node *succ = dep->succ;
- if (!succ->instr) {
- ready = false;
- break;
- }
+static ppir_node *ppir_ready_list_pick_best(ppir_block *block,
+ struct list_head *ready_list)
+{
+ unsigned int best_score = 0;
+ ppir_node *best = NULL;
+
+ list_for_each_entry(ppir_node, node, ready_list, sched_list) {
+ unsigned int score = ppir_node_score(node);
+ if (!best || score > best_score) {
+ best = node;
+ best_score = score;
}
+ }
- if (ready) {
- if (!ppir_do_node_to_instr(block, pred))
+ assert(best);
+ return best;
+}
+
+static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *root)
+{
+ struct list_head ready_list;
+ list_inithead(&ready_list);
+ list_addtail(&root->sched_list, &ready_list);
+
+ while (!list_is_empty(&ready_list)) {
+ ppir_node *node = ppir_ready_list_pick_best(block, &ready_list);
+ list_del(&node->sched_list);
+
+ /* first try pipeline sched, if that didn't succeed try normal sched */
+ if (!ppir_do_node_to_instr_try_insert(block, node))
+ if (!ppir_do_one_node_to_instr(block, node))
return false;
+
+ if (node->is_end)
+ node->instr->is_end = true;
+
+ ppir_node_foreach_pred(node, dep) {
+ ppir_node *pred = dep->pred;
+ bool ready = true;
+
+ /* pred may already have been processed by a previous node */
+ if (pred->instr)
+ continue;
+
+ /* insert pred only when all its successors have been inserted */
+ ppir_node_foreach_succ(pred, dep) {
+ ppir_node *succ = dep->succ;
+ if (!succ->instr) {
+ ready = false;
+ break;
+ }
+ }
+
+ if (ready)
+ list_addtail(&pred->sched_list, &ready_list);
}
}
diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h
index 631dcaaad51..8344a0038d6 100644
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@@ -152,6 +152,7 @@ typedef struct {
typedef struct ppir_node {
struct list_head list;
+ struct list_head sched_list;
ppir_op op;
ppir_node_type type;
int index;