aboutsummaryrefslogtreecommitdiffstats
path: root/src/compiler/glsl/lower_int64.cpp
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2016-10-14 18:17:16 -0700
committerIan Romanick <[email protected]>2017-01-20 15:41:23 -0800
commit6c3af043633997633d03e5409939263162076e81 (patch)
treef9131a40077f0e3315cf473f2bdda5e86fd256b5 /src/compiler/glsl/lower_int64.cpp
parent330fc2413c61f0bd9c7bb9f3a0ecd91b09de267a (diff)
glsl: Add a lowering pass for 64-bit integer multiplication
v2: Rename lower_64bit.cpp and lower_64bit_test.cpp to lower_int64. Suggested by Matt. Signed-off-by: Ian Romanick <[email protected]> Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/compiler/glsl/lower_int64.cpp')
-rw-r--r--src/compiler/glsl/lower_int64.cpp374
1 files changed, 374 insertions, 0 deletions
diff --git a/src/compiler/glsl/lower_int64.cpp b/src/compiler/glsl/lower_int64.cpp
new file mode 100644
index 00000000000..5952de51c2e
--- /dev/null
+++ b/src/compiler/glsl/lower_int64.cpp
@@ -0,0 +1,374 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_int64.cpp
+ *
+ * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered
+ * to a uvec2. For each operation that can be lowered, there is a function
+ * called __builtin_foo with the same number of parameters that takes uvec2
+ * sources and produces uvec2 results. An operation like
+ *
+ * uint64_t(x) * uint64_t(y)
+ *
+ * becomes
+ *
+ * packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
+ */
+
+#include "main/macros.h"
+#include "compiler/glsl_types.h"
+#include "ir.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_builder.h"
+#include "ir_optimization.h"
+#include "util/hash_table.h"
+#include "builtin_functions.h"
+
+typedef ir_function_signature *(*function_generator)(void *mem_ctx,
+ builtin_available_predicate avail);
+
+using namespace ir_builder;
+
+namespace lower_64bit {
+void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
+
+ir_dereference_variable *compact_destination(ir_factory &,
+ const glsl_type *type,
+ ir_variable *result[4]);
+
+ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
+ ir_expression *ir,
+ ir_function_signature *callee);
+};
+
+using namespace lower_64bit;
+
+namespace {
+
+class lower_64bit_visitor : public ir_rvalue_visitor {
+public:
+ lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
+ : progress(false), lower(lower), instructions(instructions),
+ function_list(), added_functions(&function_list, mem_ctx)
+ {
+ functions = _mesa_hash_table_create(mem_ctx,
+ _mesa_key_hash_string,
+ _mesa_key_string_equal);
+
+ foreach_in_list(ir_instruction, node, instructions) {
+ ir_function *const f = node->as_function();
+
+ if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
+ continue;
+
+ add_function(f);
+ }
+ }
+
+ ~lower_64bit_visitor()
+ {
+ _mesa_hash_table_destroy(functions, NULL);
+ }
+
+ void handle_rvalue(ir_rvalue **rvalue);
+
+ void add_function(ir_function *f)
+ {
+ _mesa_hash_table_insert(functions, f->name, f);
+ }
+
+ ir_function *find_function(const char *name)
+ {
+ struct hash_entry *const entry =
+ _mesa_hash_table_search(functions, name);
+
+ return entry != NULL ? (ir_function *) entry->data : NULL;
+ }
+
+ bool progress;
+
+private:
+ unsigned lower; /** Bitfield of which operations to lower */
+
+ exec_list *instructions;
+
+ /** Hashtable containing all of the known functions in the IR */
+ struct hash_table *functions;
+
+public:
+ exec_list function_list;
+
+private:
+ ir_factory added_functions;
+
+ ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
+ function_generator generator);
+};
+
+} /* anonymous namespace */
+
+static bool
+is_integer_64(const glsl_type *t)
+{
+ return t->base_type == GLSL_TYPE_UINT64 || t->base_type == GLSL_TYPE_INT64;
+}
+
+/**
+ * Determine if a particular type of lowering should occur
+ */
+#define lowering(x) (this->lower & x)
+
+bool
+lower_64bit_integer_instructions(exec_list *instructions,
+ unsigned what_to_lower)
+{
+ if (instructions->is_empty())
+ return false;
+
+ ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
+ void *const mem_ctx = ralloc_parent(first_inst);
+ lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
+
+ visit_list_elements(&v, instructions);
+
+ if (v.progress && !v.function_list.is_empty()) {
+ /* Move all of the nodes from function_list to the head if the incoming
+ * instruction list.
+ */
+ exec_node *const after = &instructions->head_sentinel;
+ exec_node *const before = instructions->head_sentinel.next;
+ exec_node *const head = v.function_list.head_sentinel.next;
+ exec_node *const tail = v.function_list.tail_sentinel.prev;
+
+ before->next = head;
+ head->prev = before;
+
+ after->prev = tail;
+ tail->next = after;
+ }
+
+ return v.progress;
+}
+
+
+/**
+ * Expand individual 64-bit values to uvec2 values
+ *
+ * Each operation is in one of a few forms.
+ *
+ * vector op vector
+ * vector op scalar
+ * scalar op vector
+ * scalar op scalar
+ *
+ * In the 'vector op vector' case, the two vectors must have the same size.
+ * In a way, the 'scalar op scalar' form is special case of the 'vector op
+ * vector' form.
+ *
+ * This method generates a new set of uvec2 values for each element of a
+ * single operand. If the operand is a scalar, the uvec2 is replicated
+ * multiple times. A value like
+ *
+ * u64vec3(a) + u64vec3(b)
+ *
+ * becomes
+ *
+ * u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
+ * uvec2 tmp1 = unpackUint2x32(tmp0.x);
+ * uvec2 tmp2 = unpackUint2x32(tmp0.y);
+ * uvec2 tmp3 = unpackUint2x32(tmp0.z);
+ *
+ * and the returned operands array contains ir_variable pointers to
+ *
+ * { tmp1, tmp2, tmp3, tmp1 }
+ */
+void
+lower_64bit::expand_source(ir_factory &body,
+ ir_rvalue *val,
+ ir_variable **expanded_src)
+{
+ assert(val->type->base_type == GLSL_TYPE_UINT64 ||
+ val->type->base_type == GLSL_TYPE_INT64);
+
+ ir_variable *const temp = body.make_temp(val->type, "tmp");
+
+ body.emit(assign(temp, val));
+
+ const ir_expression_operation unpack_opcode =
+ val->type->base_type == GLSL_TYPE_UINT64
+ ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
+
+ const glsl_type *const type =
+ val->type->base_type == GLSL_TYPE_UINT64
+ ? glsl_type::uvec2_type : glsl_type::ivec2_type;
+
+ unsigned i;
+ for (i = 0; i < val->type->vector_elements; i++) {
+ expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
+
+ body.emit(assign(expanded_src[i],
+ expr(unpack_opcode, swizzle(temp, i, 1))));
+ }
+
+ for (/* empty */; i < 4; i++)
+ expanded_src[i] = expanded_src[0];
+}
+
+/**
+ * Convert a series of uvec2 results into a single 64-bit integer vector
+ */
+ir_dereference_variable *
+lower_64bit::compact_destination(ir_factory &body,
+ const glsl_type *type,
+ ir_variable *result[4])
+{
+ const ir_expression_operation pack_opcode =
+ type->base_type == GLSL_TYPE_UINT64
+ ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
+
+ ir_variable *const compacted_result =
+ body.make_temp(type, "compacted_64bit_result");
+
+ for (unsigned i = 0; i < type->vector_elements; i++) {
+ body.emit(assign(compacted_result,
+ expr(pack_opcode, result[i]),
+ 1U << i));
+ }
+
+ void *const mem_ctx = ralloc_parent(compacted_result);
+ return new(mem_ctx) ir_dereference_variable(compacted_result);
+}
+
+ir_rvalue *
+lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
+ ir_expression *ir,
+ ir_function_signature *callee)
+{
+ const unsigned num_operands = ir->get_num_operands();
+ ir_variable *src[4][4];
+ ir_variable *dst[4];
+ void *const mem_ctx = ralloc_parent(ir);
+ exec_list instructions;
+ unsigned source_components = 0;
+ const glsl_type *const result_type =
+ ir->type->base_type == GLSL_TYPE_UINT64
+ ? glsl_type::uvec2_type : glsl_type::ivec2_type;
+
+ ir_factory body(&instructions, mem_ctx);
+
+ for (unsigned i = 0; i < num_operands; i++) {
+ expand_source(body, ir->operands[i], src[i]);
+
+ if (ir->operands[i]->type->vector_elements > source_components)
+ source_components = ir->operands[i]->type->vector_elements;
+ }
+
+ for (unsigned i = 0; i < source_components; i++) {
+ dst[i] = body.make_temp(result_type, "expanded_64bit_result");
+
+ exec_list parameters;
+
+ for (unsigned j = 0; j < num_operands; j++)
+ parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
+
+ ir_dereference_variable *const return_deref =
+ new(mem_ctx) ir_dereference_variable(dst[i]);
+
+ ir_call *const c = new(mem_ctx) ir_call(callee,
+ return_deref,
+ &parameters);
+
+ body.emit(c);
+ }
+
+ ir_rvalue *const rv = compact_destination(body, ir->type, dst);
+
+ /* Move all of the nodes from instructions between base_ir and the
+ * instruction before it.
+ */
+ exec_node *const after = base_ir;
+ exec_node *const before = after->prev;
+ exec_node *const head = instructions.head_sentinel.next;
+ exec_node *const tail = instructions.tail_sentinel.prev;
+
+ before->next = head;
+ head->prev = before;
+
+ after->prev = tail;
+ tail->next = after;
+
+ return rv;
+}
+
+ir_rvalue *
+lower_64bit_visitor::handle_op(ir_expression *ir,
+ const char *function_name,
+ function_generator generator)
+{
+ for (unsigned i = 0; i < ir->get_num_operands(); i++)
+ if (!is_integer_64(ir->operands[i]->type))
+ return ir;
+
+ /* Get a handle to the correct ir_function_signature for the core
+ * operation.
+ */
+ ir_function_signature *callee = NULL;
+ ir_function *f = find_function(function_name);
+
+ if (f != NULL) {
+ callee = (ir_function_signature *) f->signatures.get_head();
+ assert(callee != NULL && callee->ir_type == ir_type_function_signature);
+ } else {
+ f = new(base_ir) ir_function(function_name);
+ callee = generator(base_ir, NULL);
+
+ f->add_signature(callee);
+
+ add_function(f);
+ }
+
+ return lower_op_to_function_call(this->base_ir, ir, callee);
+}
+
+void
+lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+ if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
+ return;
+
+ ir_expression *const ir = (*rvalue)->as_expression();
+ assert(ir != NULL);
+
+ switch (ir->operation) {
+ case ir_binop_mul:
+ if (lowering(MUL64)) {
+ *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
+ this->progress = true;
+ }
+ break;
+
+ default:
+ break;
+ }
+}