diff options
-rw-r--r-- | src/glsl/Makefile.sources | 1 | ||||
-rw-r--r-- | src/glsl/glsl_parser_extras.cpp | 1 | ||||
-rw-r--r-- | src/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/glsl/opt_minmax.cpp | 475 |
4 files changed, 478 insertions, 0 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index bfb6993536e..0c55327d608 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -96,6 +96,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/opt_flip_matrices.cpp \ $(GLSL_SRCDIR)/opt_function_inlining.cpp \ $(GLSL_SRCDIR)/opt_if_simplification.cpp \ + $(GLSL_SRCDIR)/opt_minmax.cpp \ $(GLSL_SRCDIR)/opt_noop_swizzle.cpp \ $(GLSL_SRCDIR)/opt_rebalance_tree.cpp \ $(GLSL_SRCDIR)/opt_redundant_jumps.cpp \ diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index cc7d2d746ff..79f849465c3 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -1613,6 +1613,7 @@ do_common_optimization(exec_list *ir, bool linked, else progress = do_constant_variable_unlinked(ir) || progress; progress = do_constant_folding(ir) || progress; + progress = do_minmax_prune(ir) || progress; progress = do_cse(ir) || progress; progress = do_rebalance_tree(ir) || progress; progress = do_algebraic(ir, native_integers, options) || progress; diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 0c3a6383190..e25857ac579 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -99,6 +99,7 @@ bool opt_flatten_nested_if_blocks(exec_list *instructions); bool do_discard_simplification(exec_list *instructions); bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); bool do_mat_op_to_vec(exec_list *instructions); +bool do_minmax_prune(exec_list *instructions); bool do_noop_swizzle(exec_list *instructions); bool do_structure_splitting(exec_list *instructions); bool do_swizzle_swizzle(exec_list *instructions); diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp new file mode 100644 index 00000000000..32fb2d7ea2a --- /dev/null +++ b/src/glsl/opt_minmax.cpp @@ -0,0 +1,475 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_minmax.cpp + * + * Drop operands from an expression tree of only min/max operations if they + * can be proven to not contribute to the final result. + * + * The algorithm is similar to alpha-beta pruning on a minmax search. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "ir_builder.h" +#include "program/prog_instruction.h" +#include "glsl_types.h" +#include "main/macros.h" + +using namespace ir_builder; + +namespace { + +enum compare_components_result { + LESS, + LESS_OR_EQUAL, + EQUAL, + GREATER_OR_EQUAL, + GREATER, + MIXED +}; + +class minmax_range { +public: + minmax_range(ir_constant *low = NULL, ir_constant *high = NULL) + { + this->low = low; + this->high = high; + } + + /* low is the lower limit of the range, high is the higher limit. NULL on + * low means negative infinity (unlimited) and on high positive infinity + * (unlimited). Because of the two interpretations of the value NULL, + * arbitrary comparison between ir_constants is impossible. + */ + ir_constant *low; + ir_constant *high; +}; + +class ir_minmax_visitor : public ir_rvalue_enter_visitor { +public: + ir_minmax_visitor() + : progress(false) + { + } + + ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange); + + void handle_rvalue(ir_rvalue **rvalue); + + bool progress; +}; + +/* + * Returns LESS if all vector components of `a' are strictly lower than of `b', + * GREATER if all vector components of `a' are strictly greater than of `b', + * MIXED if some vector components of `a' are strictly lower than of `b' while + * others are strictly greater, or EQUAL otherwise. + */ +static enum compare_components_result +compare_components(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + assert(a->type->base_type == b->type->base_type); + + unsigned a_inc = a->type->is_scalar() ? 0 : 1; + unsigned b_inc = b->type->is_scalar() ? 0 : 1; + unsigned components = MAX2(a->type->components(), b->type->components()); + + bool foundless = false; + bool foundgreater = false; + bool foundequal = false; + + for (unsigned i = 0, c0 = 0, c1 = 0; + i < components; + c0 += a_inc, c1 += b_inc, ++i) { + switch (a->type->base_type) { + case GLSL_TYPE_UINT: + if (a->value.u[c0] < b->value.u[c1]) + foundless = true; + else if (a->value.u[c0] > b->value.u[c1]) + foundgreater = true; + else + foundequal = true; + break; + case GLSL_TYPE_INT: + if (a->value.i[c0] < b->value.i[c1]) + foundless = true; + else if (a->value.i[c0] > b->value.i[c1]) + foundgreater = true; + else + foundequal = true; + break; + case GLSL_TYPE_FLOAT: + if (a->value.f[c0] < b->value.f[c1]) + foundless = true; + else if (a->value.f[c0] > b->value.f[c1]) + foundgreater = true; + else + foundequal = true; + break; + default: + unreachable("not reached"); + } + } + + if (foundless && foundgreater) { + /* Some components are strictly lower, others are strictly greater */ + return MIXED; + } + + if (foundequal) { + /* It is not mixed, but it is not strictly lower or greater */ + if (foundless) + return LESS_OR_EQUAL; + if (foundgreater) + return GREATER_OR_EQUAL; + return EQUAL; + } + + /* All components are strictly lower or strictly greater */ + return foundless ? LESS : GREATER; +} + +static ir_constant * +combine_constant(bool ismin, ir_constant *a, ir_constant *b) +{ + void *mem_ctx = ralloc_parent(a); + ir_constant *c = a->clone(mem_ctx, NULL); + for (unsigned i = 0; i < c->type->components(); i++) { + switch (c->type->base_type) { + case GLSL_TYPE_UINT: + if ((ismin && b->value.u[i] < c->value.u[i]) || + (!ismin && b->value.u[i] > c->value.u[i])) + c->value.u[i] = b->value.u[i]; + break; + case GLSL_TYPE_INT: + if ((ismin && b->value.i[i] < c->value.i[i]) || + (!ismin && b->value.i[i] > c->value.i[i])) + c->value.i[i] = b->value.i[i]; + break; + case GLSL_TYPE_FLOAT: + if ((ismin && b->value.f[i] < c->value.f[i]) || + (!ismin && b->value.f[i] > c->value.f[i])) + c->value.f[i] = b->value.f[i]; + break; + default: + assert(!"not reached"); + } + } + return c; +} + +static ir_constant * +smaller_constant(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + enum compare_components_result ret = compare_components(a, b); + if (ret == MIXED) + return combine_constant(true, a, b); + else if (ret < EQUAL) + return a; + else + return b; +} + +static ir_constant * +larger_constant(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + enum compare_components_result ret = compare_components(a, b); + if (ret == MIXED) + return combine_constant(false, a, b); + else if (ret < EQUAL) + return b; + else + return a; +} + +/* Combines two ranges by doing an element-wise min() / max() depending on the + * operation. + */ +static minmax_range +combine_range(minmax_range r0, minmax_range r1, bool ismin) +{ + minmax_range ret; + + if (!r0.low) { + ret.low = ismin ? r0.low : r1.low; + } else if (!r1.low) { + ret.low = ismin ? r1.low : r0.low; + } else { + ret.low = ismin ? smaller_constant(r0.low, r1.low) : + larger_constant(r0.low, r1.low); + } + + if (!r0.high) { + ret.high = ismin ? r1.high : r0.high; + } else if (!r1.high) { + ret.high = ismin ? r0.high : r1.high; + } else { + ret.high = ismin ? smaller_constant(r0.high, r1.high) : + larger_constant(r0.high, r1.high); + } + + return ret; +} + +/* Returns a range so that lower limit is the larger of the two lower limits, + * and higher limit is the smaller of the two higher limits. + */ +static minmax_range +range_intersection(minmax_range r0, minmax_range r1) +{ + minmax_range ret; + + if (!r0.low) + ret.low = r1.low; + else if (!r1.low) + ret.low = r0.low; + else + ret.low = larger_constant(r0.low, r1.low); + + if (!r0.high) + ret.high = r1.high; + else if (!r1.high) + ret.high = r0.high; + else + ret.high = smaller_constant(r0.high, r1.high); + + return ret; +} + +static minmax_range +get_range(ir_rvalue *rval) +{ + ir_expression *expr = rval->as_expression(); + if (expr && (expr->operation == ir_binop_min || + expr->operation == ir_binop_max)) { + minmax_range r0 = get_range(expr->operands[0]); + minmax_range r1 = get_range(expr->operands[1]); + return combine_range(r0, r1, expr->operation == ir_binop_min); + } + + ir_constant *c = rval->as_constant(); + if (c) { + return minmax_range(c, c); + } + + return minmax_range(); +} + +/** + * Prunes a min/max expression considering the base range of the parent + * min/max expression. + * + * @param baserange the range that the parents of this min/max expression + * in the min/max tree will clamp its value to. + */ +ir_rvalue * +ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange) +{ + assert(expr->operation == ir_binop_min || + expr->operation == ir_binop_max); + + bool ismin = expr->operation == ir_binop_min; + minmax_range limits[2]; + + /* Recurse to get the ranges for each of the subtrees of this + * expression. We need to do this as a separate step because we need to + * know the ranges of each of the subtrees before we prune either one. + * Consider something like this: + * + * max + * / \ + * max max + * / \ / \ + * 3 a b 2 + * + * We would like to prune away the max on the bottom-right, but to do so + * we need to know the range of the expression on the left beforehand, + * and there's no guarantee that we will visit either subtree in a + * particular order. + */ + for (unsigned i = 0; i < 2; ++i) + limits[i] = get_range(expr->operands[i]); + + for (unsigned i = 0; i < 2; ++i) { + bool is_redundant = false; + + enum compare_components_result cr = LESS; + if (ismin) { + /* If this operand will always be greater than the other one, it's + * redundant. + */ + if (limits[i].low && limits[1 - i].high) { + cr = compare_components(limits[i].low, limits[1 - i].high); + if (cr >= EQUAL && cr != MIXED) + is_redundant = true; + } + /* If this operand is always greater than baserange, then even if + * it's smaller than the other one it'll get clamped, so it's + * redundant. + */ + if (!is_redundant && limits[i].low && baserange.high) { + cr = compare_components(limits[i].low, baserange.high); + if (cr >= EQUAL && cr != MIXED) + is_redundant = true; + } + } else { + /* If this operand will always be lower than the other one, it's + * redundant. + */ + if (limits[i].high && limits[1 - i].low) { + cr = compare_components(limits[i].high, limits[1 - i].low); + if (cr <= EQUAL) + is_redundant = true; + } + /* If this operand is always lower than baserange, then even if + * it's greater than the other one it'll get clamped, so it's + * redundant. + */ + if (!is_redundant && limits[i].high && baserange.low) { + cr = compare_components(limits[i].high, baserange.low); + if (cr <= EQUAL) + is_redundant = true; + } + } + + if (is_redundant) { + progress = true; + + /* Recurse if necessary. */ + ir_expression *op_expr = expr->operands[1 - i]->as_expression(); + if (op_expr && (op_expr->operation == ir_binop_min || + op_expr->operation == ir_binop_max)) { + return prune_expression(op_expr, baserange); + } + + return expr->operands[1 - i]; + } else if (cr == MIXED) { + /* If we have mixed vector operands, we can try to resolve the minmax + * expression by doing a component-wise minmax: + * + * min min + * / \ / \ + * min a ===> [1,1] a + * / \ + * [1,3] [3,1] + * + */ + ir_constant *a = expr->operands[0]->as_constant(); + ir_constant *b = expr->operands[1]->as_constant(); + if (a && b) + return combine_constant(ismin, a, b); + } + } + + /* Now recurse to operands giving them the proper baserange. The baserange + * to pass is the intersection of our baserange and the other operand's + * limit with one of the ranges unlimited. If we can't compute a valid + * intersection, we use the current baserange. + */ + for (unsigned i = 0; i < 2; ++i) { + ir_expression *op_expr = expr->operands[i]->as_expression(); + if (op_expr && (op_expr->operation == ir_binop_min || + op_expr->operation == ir_binop_max)) { + /* We can only compute a new baserange for this operand if we managed + * to compute a valid range for the other operand. + */ + if (ismin) + limits[1 - i].low = NULL; + else + limits[1 - i].high = NULL; + minmax_range base = range_intersection(limits[1 - i], baserange); + expr->operands[i] = prune_expression(op_expr, base); + } + } + + /* If we got here we could not discard any of the operands of the minmax + * expression, but we can still try to resolve the expression if both + * operands are constant. We do this after the loop above, to make sure + * that if our operands are minmax expressions we have tried to prune them + * first (hopefully reducing them to constants). + */ + ir_constant *a = expr->operands[0]->as_constant(); + ir_constant *b = expr->operands[1]->as_constant(); + if (a && b) + return combine_constant(ismin, a, b); + + return expr; +} + +static ir_rvalue * +swizzle_if_required(ir_expression *expr, ir_rvalue *rval) +{ + if (expr->type->is_vector() && rval->type->is_scalar()) { + return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements); + } else { + return rval; + } +} + +void +ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr || (expr->operation != ir_binop_min && + expr->operation != ir_binop_max)) + return; + + ir_rvalue *new_rvalue = prune_expression(expr, minmax_range()); + if (new_rvalue == *rvalue) + return; + + /* If the expression type is a vector and the optimization leaves a scalar + * as the result, we need to turn it into a vector. + */ + *rvalue = swizzle_if_required(expr, new_rvalue); + + progress = true; +} + +} + +bool +do_minmax_prune(exec_list *instructions) +{ + ir_minmax_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} |