summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-07-03 17:11:07 +0200
committerMarek Olšák <[email protected]>2016-11-15 20:23:39 +0100
commite33440070a54cd3e67953ee8410c0edb62643c47 (patch)
treeffd0121538e31407caa5330c904fc3c05c3cea24 /src
parent83d9b8a6f6365bc1569cdc847672478ac643670d (diff)
glsl/lower_if: conditionally lower if-branches based on their size
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/compiler/glsl/ir_optimization.h2
-rw-r--r--src/compiler/glsl/lower_if_to_cond_assign.cpp55
2 files changed, 50 insertions, 7 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index e6e8318a3aa..0d6c4e6a66a 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -109,7 +109,7 @@ bool do_if_simplification(exec_list *instructions);
bool opt_flatten_nested_if_blocks(exec_list *instructions);
bool do_discard_simplification(exec_list *instructions);
bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
- unsigned max_depth = 0);
+ unsigned max_depth = 0, unsigned min_branch_cost = 0);
bool do_mat_op_to_vec(exec_list *instructions);
bool do_minmax_prune(exec_list *instructions);
bool do_noop_swizzle(exec_list *instructions);
diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp
index e8db7aa9b7f..ae048be0d27 100644
--- a/src/compiler/glsl/lower_if_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp
@@ -24,8 +24,14 @@
/**
* \file lower_if_to_cond_assign.cpp
*
- * This attempts to flatten if-statements to conditional assignments for
- * GPUs with limited or no flow control support.
+ * This flattens if-statements to conditional assignments if:
+ *
+ * - the GPU has limited or no flow control support
+ * (controlled by max_depth)
+ *
+ * - small conditional branches are more expensive than conditional assignments
+ * (controlled by min_branch_cost, that's the cost for a branch to be
+ * preserved)
*
* It can't handle other control flow being inside of its block, such
* as calls or loops. Hopefully loop unrolling and inlining will take
@@ -49,17 +55,20 @@
#include "ir.h"
#include "util/set.h"
#include "util/hash_table.h" /* Needed for the hashing functions */
+#include "main/macros.h" /* for MAX2 */
namespace {
class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
public:
ir_if_to_cond_assign_visitor(gl_shader_stage stage,
- unsigned max_depth)
+ unsigned max_depth,
+ unsigned min_branch_cost)
{
this->progress = false;
this->stage = stage;
this->max_depth = max_depth;
+ this->min_branch_cost = min_branch_cost;
this->depth = 0;
this->condition_variables =
@@ -76,8 +85,13 @@ public:
ir_visitor_status visit_leave(ir_if *);
bool found_unsupported_op;
+ bool found_expensive_op;
+ bool is_then;
bool progress;
gl_shader_stage stage;
+ unsigned then_cost;
+ unsigned else_cost;
+ unsigned min_branch_cost;
unsigned max_depth;
unsigned depth;
@@ -88,12 +102,12 @@ public:
bool
lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
- unsigned max_depth)
+ unsigned max_depth, unsigned min_branch_cost)
{
if (max_depth == UINT_MAX)
return false;
- ir_if_to_cond_assign_visitor v(stage, max_depth);
+ ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
visit_list_elements(&v, instructions);
@@ -129,6 +143,20 @@ check_ir_node(ir_instruction *ir, void *data)
break;
}
+ /* SSBO, images, atomic counters are handled by ir_type_call */
+ case ir_type_texture:
+ v->found_expensive_op = true;
+ break;
+
+ case ir_type_expression:
+ case ir_type_dereference_array:
+ case ir_type_dereference_record:
+ if (v->is_then)
+ v->then_cost++;
+ else
+ v->else_cost++;
+ break;
+
default:
break;
}
@@ -193,24 +221,39 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
ir_visitor_status
ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
{
+ bool must_lower = this->depth-- > this->max_depth;
+
/* Only flatten when beyond the GPU's maximum supported nesting depth. */
- if (this->depth-- <= this->max_depth)
+ if (!must_lower && this->min_branch_cost == 0)
return visit_continue;
this->found_unsupported_op = false;
+ this->found_expensive_op = false;
+ this->then_cost = 0;
+ this->else_cost = 0;
ir_assignment *assign;
/* Check that both blocks don't contain anything we can't support. */
+ this->is_then = true;
foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
visit_tree(then_ir, check_ir_node, this);
}
+
+ this->is_then = false;
foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
visit_tree(else_ir, check_ir_node, this);
}
+
if (this->found_unsupported_op)
return visit_continue; /* can't handle inner unsupported opcodes */
+ /* Skip if the branch cost is high enough or if there's an expensive op. */
+ if (!must_lower &&
+ (this->found_expensive_op ||
+ MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
+ return visit_continue;
+
void *mem_ctx = ralloc_parent(ir);
/* Store the condition to a variable. Move all of the instructions from