diff options
Diffstat (limited to 'src/gallium/drivers/lima/ir/gp/reduce_scheduler.c')
-rw-r--r-- | src/gallium/drivers/lima/ir/gp/reduce_scheduler.c | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c b/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c new file mode 100644 index 00000000000..f20768e12e4 --- /dev/null +++ b/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include <limits.h> + +#include "gpir.h" + +/* Register sensitive schedule algorithm from paper: + * "Register-Sensitive Selection, Duplication, and Sequencing of Instructions" + * Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons + */ + +static void schedule_calc_sched_info(gpir_node *node) +{ + int n = 0; + float extra_reg = 1.0f; + + /* update all children's sched info */ + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + + if (pred->rsched.reg_pressure < 0) + schedule_calc_sched_info(pred); + + int est = pred->rsched.est + 1; + if (node->rsched.est < est) + node->rsched.est = est; + + float reg_weight = 1.0f - 1.0f / list_length(&pred->succ_list); + if (extra_reg > reg_weight) + extra_reg = reg_weight; + + n++; + } + + /* leaf instr */ + if (!n) { + node->rsched.reg_pressure = 0; + return; + } + + int i = 0; + float reg[n]; + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + reg[i++] = pred->rsched.reg_pressure; + } + + /* sort */ + for (i = 0; i < n - 1; i++) { + for (int j = 0; j < n - i - 1; j++) { + if (reg[j] > reg[j + 1]) { + float tmp = reg[j + 1]; + reg[j + 1] = reg[j]; + reg[j] = tmp; + } + } + } + + for (i = 0; i < n; i++) { + float pressure = reg[i] + n - (i + 1); + if (pressure > node->rsched.reg_pressure) + node->rsched.reg_pressure = pressure; + } + + /* If all children of this node have multi parents, then this + * node need an extra reg to store its result. For example, + * it's not fair for parent has the same reg pressure as child + * if n==1 and child's successor>1, because we need 2 reg for + * this. + * + * But we can't add a full reg to the reg_pressure, because the + * last parent of a multi-successor child doesn't need an extra + * reg. For example, a single child (with multi successor) node + * should has less reg pressure than a two children (with single + * successor) instr. + * + * extra reg = min(all child)(1.0 - 1.0 / num successor) + */ + node->rsched.reg_pressure += extra_reg; +} + +static void schedule_insert_ready_list(struct list_head *ready_list, + gpir_node *insert_node) +{ + struct list_head *insert_pos = ready_list; + + list_for_each_entry(gpir_node, node, ready_list, list) { + if (insert_node->rsched.parent_index < node->rsched.parent_index || + (insert_node->rsched.parent_index == node->rsched.parent_index && + (insert_node->rsched.reg_pressure < node->rsched.reg_pressure || + (insert_node->rsched.reg_pressure == node->rsched.reg_pressure && + (insert_node->rsched.est >= node->rsched.est))))) { + insert_pos = &node->list; + break; + } + } + + list_del(&insert_node->list); + list_addtail(&insert_node->list, insert_pos); +} + +static void schedule_ready_list(gpir_block *block, struct list_head *ready_list) +{ + if (list_empty(ready_list)) + return; + + gpir_node *node = list_first_entry(ready_list, gpir_node, list); + list_del(&node->list); + + /* schedule the node to the block node list */ + list_add(&node->list, &block->node_list); + node->rsched.scheduled = true; + block->rsched.node_index--; + + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + pred->rsched.parent_index = block->rsched.node_index; + + bool ready = true; + gpir_node_foreach_succ(pred, dep) { + gpir_node *succ = dep->succ; + if (!succ->rsched.scheduled) { + ready = false; + break; + } + } + /* all successor have been scheduled */ + if (ready) + schedule_insert_ready_list(ready_list, pred); + } + + schedule_ready_list(block, ready_list); +} + +static void schedule_block(gpir_block *block) +{ + /* move all nodes to node_list, block->node_list will + * contain schedule result */ + struct list_head node_list; + list_replace(&block->node_list, &node_list); + list_inithead(&block->node_list); + + /* step 2 & 3 */ + list_for_each_entry(gpir_node, node, &node_list, list) { + if (gpir_node_is_root(node)) + schedule_calc_sched_info(node); + block->rsched.node_index++; + } + + /* step 4 */ + struct list_head ready_list; + list_inithead(&ready_list); + + /* step 5 */ + list_for_each_entry_safe(gpir_node, node, &node_list, list) { + if (gpir_node_is_root(node)) { + node->rsched.parent_index = INT_MAX; + schedule_insert_ready_list(&ready_list, node); + } + } + + /* step 6 */ + schedule_ready_list(block, &ready_list); +} + +bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp) +{ + /* No need to build physical reg load/store dependency here, + * because we just exit SSA form, there should be at most + * one load and one store pair for a physical reg within a + * block, and the store must be after load with the output + * of load as input after some calculation. So we don't need to + * insert extra write-after-read or read-after-write dependecy + * for load/store nodes to maintain the right sequence before + * scheduling. + * + * Also no need to handle SSA def/use in difference block, + * because we'll load/store SSA to a physical reg if def/use + * are not in the same block. + */ + + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + block->rsched.node_index = 0; + list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { + node->rsched.reg_pressure = -1; + node->rsched.est = 0; + node->rsched.scheduled = false; + } + } + + list_for_each_entry(gpir_block, block, &comp->block_list, list) { + schedule_block(block); + } + + gpir_debug("after reduce scheduler\n"); + gpir_node_print_prog_seq(comp); + return true; +} |