aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/lima/ir/gp/lower.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/lima/ir/gp/lower.c')
-rw-r--r--src/gallium/drivers/lima/ir/gp/lower.c529
1 files changed, 529 insertions, 0 deletions
diff --git a/src/gallium/drivers/lima/ir/gp/lower.c b/src/gallium/drivers/lima/ir/gp/lower.c
new file mode 100644
index 00000000000..b52cb38347b
--- /dev/null
+++ b/src/gallium/drivers/lima/ir/gp/lower.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2017 Lima Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "util/ralloc.h"
+
+#include "gpir.h"
+#include "lima_context.h"
+
+static gpir_node *
+gpir_lower_create_insert_node(gpir_node *parent, gpir_node *child,
+ gpir_node *child2, gpir_op op)
+{
+ gpir_node *node = gpir_node_create(parent->block, op);
+ if (!node)
+ return NULL;
+
+ gpir_alu_node *alu = gpir_node_to_alu(node);
+ alu->children[0] = child;
+ alu->children[1] = child2;
+ alu->num_child = 2;
+ gpir_node_insert_child(parent, child, node);
+ gpir_node_add_dep(node, child2, GPIR_DEP_INPUT);
+ list_addtail(&node->list, &parent->list);
+ return node;
+}
+
+static bool gpir_lower_viewport_transform(gpir_compiler *comp)
+{
+ gpir_node *rcpw = NULL;
+
+ /* rcpw = 1 / w */
+ list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+ list_for_each_entry(gpir_node, node, &block->node_list, list) {
+ if (node->op == gpir_op_store_varying) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ if (store->index == 0 && store->component == 3) {
+ gpir_node *w = store->child;
+
+ rcpw = gpir_node_create(block, gpir_op_rcp);
+ if (!rcpw)
+ return false;
+ list_addtail(&rcpw->list, &node->list);
+
+ gpir_alu_node *alu = gpir_node_to_alu(rcpw);
+ alu->children[0] = w;
+ alu->num_child = 1;
+ store->child = rcpw;
+
+ gpir_node_insert_child(node, w, rcpw);
+ goto found;
+ }
+ }
+ }
+ }
+
+found:
+ assert(rcpw);
+
+ /* xyz = xyz * rcpw * scale + transition */
+ list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+ list_for_each_entry(gpir_node, node, &block->node_list, list) {
+ if (node->op == gpir_op_store_varying) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ if (store->index == 0 && store->component < 3) {
+ gpir_node *xyz = store->child;
+
+ gpir_node *mul1 =
+ gpir_lower_create_insert_node(node, xyz, rcpw, gpir_op_mul);
+ if (!mul1)
+ return false;
+
+ gpir_load_node *scale = gpir_node_create(block, gpir_op_load_uniform);
+ if (!scale)
+ return false;
+ scale->index = comp->constant_base;
+ scale->component = store->component;
+ list_addtail(&scale->node.list, &node->list);
+
+ gpir_node *mul2 =
+ gpir_lower_create_insert_node(node, mul1, &scale->node, gpir_op_mul);
+ if (!mul2)
+ return false;
+
+ gpir_load_node *translate = gpir_node_create(block, gpir_op_load_uniform);
+ if (!translate)
+ return false;
+ translate->index = comp->constant_base + 1;
+ translate->component = store->component;
+ list_addtail(&translate->node.list, &node->list);
+
+ gpir_node *add =
+ gpir_lower_create_insert_node(node, mul2, &translate->node, gpir_op_add);
+ if (!add)
+ return false;
+
+ store->child = add;
+ }
+ }
+ }
+ }
+
+ comp->constant_base += 2;
+ return true;
+}
+
+static bool gpir_lower_const(gpir_compiler *comp)
+{
+ int num_constant = 0;
+ list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+ list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
+ if (node->op == gpir_op_const) {
+ if (gpir_node_is_root(node))
+ gpir_node_delete(node);
+ else
+ num_constant++;
+ }
+ }
+ }
+
+ if (num_constant) {
+ union fi *constant = ralloc_array(comp->prog, union fi, num_constant);
+ if (!constant)
+ return false;
+
+ comp->prog->constant = constant;
+ comp->prog->constant_size = num_constant * sizeof(union fi);
+
+ int index = 0;
+ list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+ list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
+ if (node->op == gpir_op_const) {
+ gpir_const_node *c = gpir_node_to_const(node);
+
+ if (!gpir_node_is_root(node)) {
+ gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform);
+ if (unlikely(!load))
+ return false;
+
+ load->index = comp->constant_base + (index >> 2);
+ load->component = index % 4;
+ constant[index++] = c->value;
+
+ gpir_node_replace_succ(&load->node, node);
+
+ list_addtail(&load->node.list, &node->list);
+
+ gpir_debug("lower const create uniform %d for const %d\n",
+ load->node.index, node->index);
+ }
+
+ gpir_node_delete(node);
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/* duplicate load to all its successors */
+static bool gpir_lower_load(gpir_compiler *comp)
+{
+ list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+ list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
+ if (node->type == gpir_node_type_load) {
+ gpir_load_node *load = gpir_node_to_load(node);
+
+ bool first = true;
+ gpir_node_foreach_succ_safe(node, dep) {
+ gpir_node *succ = dep->succ;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ gpir_node *new = gpir_node_create(succ->block, node->op);
+ if (unlikely(!new))
+ return false;
+ list_addtail(&new->list, &succ->list);
+
+ gpir_debug("lower load create %d from %d for succ %d\n",
+ new->index, node->index, succ->index);
+
+ gpir_load_node *nload = gpir_node_to_load(new);
+ nload->index = load->index;
+ nload->component = load->component;
+ if (load->reg) {
+ nload->reg = load->reg;
+ list_addtail(&nload->reg_link, &load->reg->uses_list);
+ }
+
+ gpir_node_replace_pred(dep, new);
+ gpir_node_replace_child(succ, node, new);
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool gpir_lower_neg(gpir_block *block, gpir_node *node)
+{
+ gpir_alu_node *neg = gpir_node_to_alu(node);
+ gpir_node *child = neg->children[0];
+
+ /* check if child can dest negate */
+ if (child->type == gpir_node_type_alu) {
+ /* negate must be its only successor */
+ if (list_is_singular(&child->succ_list) &&
+ gpir_op_infos[child->op].dest_neg) {
+ gpir_alu_node *alu = gpir_node_to_alu(child);
+ alu->dest_negate = !alu->dest_negate;
+
+ gpir_node_replace_succ(child, node);
+ gpir_node_delete(node);
+ return true;
+ }
+ }
+
+ /* check if child can src negate */
+ gpir_node_foreach_succ_safe(node, dep) {
+ gpir_node *succ = dep->succ;
+ if (succ->type != gpir_node_type_alu)
+ continue;
+
+ bool success = true;
+ gpir_alu_node *alu = gpir_node_to_alu(dep->succ);
+ for (int i = 0; i < alu->num_child; i++) {
+ if (alu->children[i] == node) {
+ if (gpir_op_infos[succ->op].src_neg[i]) {
+ alu->children_negate[i] = !alu->children_negate[i];
+ alu->children[i] = child;
+ }
+ else
+ success = false;
+ }
+ }
+
+ if (success)
+ gpir_node_replace_pred(dep, child);
+ }
+
+ if (gpir_node_is_root(node))
+ gpir_node_delete(node);
+
+ return true;
+}
+
+static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
+{
+ gpir_alu_node *alu = gpir_node_to_alu(node);
+ gpir_node *child = alu->children[0];
+
+ gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
+ if (unlikely(!complex2))
+ return false;
+
+ complex2->children[0] = child;
+ complex2->num_child = 1;
+ gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT);
+ list_addtail(&complex2->node.list, &node->list);
+
+ int impl_op = 0;
+ switch (node->op) {
+ case gpir_op_rcp:
+ impl_op = gpir_op_rcp_impl;
+ break;
+ case gpir_op_rsqrt:
+ impl_op = gpir_op_rsqrt_impl;
+ break;
+ default:
+ assert(0);
+ }
+
+ gpir_alu_node *impl = gpir_node_create(block, impl_op);
+ if (unlikely(!impl))
+ return false;
+
+ impl->children[0] = child;
+ impl->num_child = 1;
+ gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
+ list_addtail(&impl->node.list, &node->list);
+
+ /* change node to complex1 node */
+ node->op = gpir_op_complex1;
+ alu->children[0] = &impl->node;
+ alu->children[1] = &complex2->node;
+ alu->children[2] = child;
+ alu->num_child = 3;
+ gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT);
+ gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT);
+
+ return true;
+}
+
+static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp)
+{
+ list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+ list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
+ if (gpir_op_infos[node->op].may_consume_two_slots) {
+ /* dummy_f/m are auxiliary nodes for value reg alloc:
+ * 1. before reg alloc, create fake nodes dummy_f, dummy_m,
+ * so the tree become: (dummy_m (node dummy_f))
+ * dummy_m can be spilled, but other nodes in the tree can't
+ * be spilled.
+ * 2. After reg allocation and fake dep add, merge all deps of
+ * dummy_m and dummy_f to node and remove dummy_m & dummy_f
+ *
+ * We may also not use dummy_f/m, but alloc two value reg for
+ * node. But that means we need to make sure there're 2 free
+ * slot after the node successors, but we just need one slot
+ * after to be able to schedule it because we can use one move for
+ * the two slot node. It's also not easy to handle the spill case
+ * for the alloc 2 value method.
+ *
+ * With the dummy_f/m method, there's no such requirement, the
+ * node can be scheduled only when there's two slots for it,
+ * otherwise a move. And the node can be spilled with one reg.
+ */
+ gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m);
+ if (unlikely(!dummy_m))
+ return false;
+ list_add(&dummy_m->list, &node->list);
+
+ gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f);
+ if (unlikely(!dummy_f))
+ return false;
+ list_add(&dummy_f->list, &node->list);
+
+ gpir_alu_node *alu = gpir_node_to_alu(dummy_m);
+ alu->children[0] = node;
+ alu->children[1] = dummy_f;
+ alu->num_child = 2;
+
+ gpir_node_replace_succ(dummy_m, node);
+ gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT);
+ gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT);
+
+ }
+ }
+ }
+
+ return true;
+}
+
+/*
+ * There are no 'equal' or 'not-equal' opcodes.
+ * eq (a == b) is lowered to and(a >= b, b >= a)
+ * ne (a != b) is lowered to or(a < b, b < a)
+ */
+static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node)
+{
+ gpir_op cmp_node_op;
+ gpir_op node_new_op;
+ switch (node->op) {
+ case gpir_op_eq:
+ cmp_node_op = gpir_op_ge;
+ node_new_op = gpir_op_min; /* and */
+ break;
+ case gpir_op_ne:
+ cmp_node_op = gpir_op_lt;
+ node_new_op = gpir_op_max; /* or */
+ break;
+ default:
+ assert(0);
+ }
+
+ gpir_alu_node *e = gpir_node_to_alu(node);
+
+ gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op);
+ list_addtail(&cmp1->node.list, &node->list);
+ gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op);
+ list_addtail(&cmp2->node.list, &node->list);
+
+ cmp1->children[0] = e->children[0];
+ cmp1->children[1] = e->children[1];
+ cmp1->num_child = 2;
+
+ cmp2->children[0] = e->children[1];
+ cmp2->children[1] = e->children[0];
+ cmp2->num_child = 2;
+
+ gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT);
+ gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT);
+
+ gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT);
+ gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT);
+
+ gpir_node_foreach_pred_safe(node, dep) {
+ gpir_node_remove_dep(node, dep->pred);
+ }
+
+ gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT);
+ gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT);
+
+ node->op = node_new_op;
+ e->children[0] = &cmp1->node;
+ e->children[1] = &cmp2->node;
+ e->num_child = 2;
+
+ return true;
+}
+
+/*
+ * There is no 'abs' opcode.
+ * abs(a) is lowered to max(a, -a)
+ */
+static bool gpir_lower_abs(gpir_block *block, gpir_node *node)
+{
+ gpir_alu_node *alu = gpir_node_to_alu(node);
+
+ assert(node->op == gpir_op_abs);
+
+ node->op = gpir_op_max;
+
+ alu->children[1] = alu->children[0];
+ alu->children_negate[1] = true;
+ alu->num_child = 2;
+
+ return true;
+}
+
+/*
+ * There is no 'not' opcode.
+ * not(a) is lowered to add(1, -a)
+ */
+static bool gpir_lower_not(gpir_block *block, gpir_node *node)
+{
+ gpir_alu_node *alu = gpir_node_to_alu(node);
+
+ assert(alu->node.op == gpir_op_not);
+
+ node->op = gpir_op_add;
+
+ gpir_node *node_const = gpir_node_create(block, gpir_op_const);
+ gpir_const_node *c = gpir_node_to_const(node_const);
+
+ assert(c->node.op == gpir_op_const);
+
+ list_addtail(&c->node.list, &node->list);
+ c->value.f = 1.0f;
+ gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT);
+
+ alu->children_negate[1] = !alu->children_negate[0];
+ alu->children[1] = alu->children[0];
+ alu->children[0] = &c->node;
+ alu->num_child = 2;
+
+ return true;
+}
+
+
+static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
+ [gpir_op_not] = gpir_lower_not,
+};
+
+static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
+ [gpir_op_neg] = gpir_lower_neg,
+ [gpir_op_rcp] = gpir_lower_complex,
+ [gpir_op_rsqrt] = gpir_lower_complex,
+ [gpir_op_eq] = gpir_lower_eq_ne,
+ [gpir_op_ne] = gpir_lower_eq_ne,
+ [gpir_op_abs] = gpir_lower_abs,
+};
+
+bool gpir_pre_rsched_lower_prog(gpir_compiler *comp)
+{
+ if (!gpir_lower_viewport_transform(comp))
+ return false;
+
+ list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+ list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
+ if (gpir_pre_rsched_lower_funcs[node->op] &&
+ !gpir_pre_rsched_lower_funcs[node->op](block, node))
+ return false;
+ }
+ }
+
+ if (!gpir_lower_const(comp))
+ return false;
+
+ if (!gpir_lower_load(comp))
+ return false;
+
+ gpir_debug("pre rsched lower prog\n");
+ gpir_node_print_prog_seq(comp);
+ return true;
+}
+
+bool gpir_post_rsched_lower_prog(gpir_compiler *comp)
+{
+ list_for_each_entry(gpir_block, block, &comp->block_list, list) {
+ list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
+ if (gpir_post_rsched_lower_funcs[node->op] &&
+ !gpir_post_rsched_lower_funcs[node->op](block, node))
+ return false;
+ }
+ }
+
+ if (!gpir_lower_node_may_consume_two_slots(comp))
+ return false;
+
+ gpir_debug("post rsched lower prog\n");
+ gpir_node_print_prog_seq(comp);
+ return true;
+}