From 9a1d063c6d679c2155f5eb80f1cb94368d36bf2c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 24 Nov 2010 22:02:26 -0800 Subject: glsl: Add an optimization pass to simplify discards. NOTE: This is a candidate for the 7.9 branch. --- src/glsl/ir_optimization.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/glsl/ir_optimization.h') diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index fa497a45551..05c1becc5eb 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -51,6 +51,7 @@ bool do_function_inlining(exec_list *instructions); bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false); bool do_lower_texture_projection(exec_list *instructions); bool do_if_simplification(exec_list *instructions); +bool do_discard_simplification(exec_list *instructions); bool do_if_to_cond_assign(exec_list *instructions); bool do_mat_op_to_vec(exec_list *instructions); bool do_mod_to_fract(exec_list *instructions); -- cgit v1.2.3 From 940df10100d740ef27fa39026fd51c3199ed3d62 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 25 Nov 2010 01:09:26 -0800 Subject: glsl: Add a lowering pass to move discards out of if-statements. This should allow lower_if_to_cond_assign to work in the presence of discards, fixing bug #31690 and likely #31983. NOTE: This is a candidate for the 7.9 branch. --- src/glsl/Makefile | 1 + src/glsl/SConscript | 1 + src/glsl/ir_optimization.h | 1 + src/glsl/lower_discard.cpp | 198 ++++++++++++++++++++++++++++++++++++++++ src/mesa/program/ir_to_mesa.cpp | 4 +- 5 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 src/glsl/lower_discard.cpp (limited to 'src/glsl/ir_optimization.h') diff --git a/src/glsl/Makefile b/src/glsl/Makefile index 83ecb7f9c0d..2674c6ec485 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -52,6 +52,7 @@ CXX_SOURCES = \ loop_analysis.cpp \ loop_controls.cpp \ loop_unroll.cpp \ + lower_discard.cpp \ lower_if_to_cond_assign.cpp \ lower_instructions.cpp \ lower_jumps.cpp \ diff --git a/src/glsl/SConscript b/src/glsl/SConscript index 16f02e94b12..b5b1728beef 100644 --- a/src/glsl/SConscript +++ b/src/glsl/SConscript @@ -49,6 +49,7 @@ sources = [ 'loop_analysis.cpp', 'loop_controls.cpp', 'loop_unroll.cpp', + 'lower_discard.cpp', 'lower_if_to_cond_assign.cpp', 'lower_instructions.cpp', 'lower_jumps.cpp', diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 05c1becc5eb..1048ff982e0 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -62,6 +62,7 @@ bool do_swizzle_swizzle(exec_list *instructions); bool do_tree_grafting(exec_list *instructions); bool do_vec_index_to_cond_assign(exec_list *instructions); bool do_vec_index_to_swizzle(exec_list *instructions); +bool lower_discard(exec_list *instructions); bool lower_instructions(exec_list *instructions, unsigned what_to_lower); bool lower_noise(exec_list *instructions); bool lower_variable_index_to_cond_assign(exec_list *instructions, diff --git a/src/glsl/lower_discard.cpp b/src/glsl/lower_discard.cpp new file mode 100644 index 00000000000..b95313df8c8 --- /dev/null +++ b/src/glsl/lower_discard.cpp @@ -0,0 +1,198 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_discard.cpp + * + * This pass moves discards out of if-statements. + * + * Case 1: The "then" branch contains a conditional discard: + * --------------------------------------------------------- + * + * if (cond1) { + * s1; + * discard cond2; + * s2; + * } else { + * s3; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * temp = cond2; + * s2; + * } else { + * s3; + * } + * discard temp; + * + * Case 2: The "else" branch contains a conditional discard: + * --------------------------------------------------------- + * + * if (cond1) { + * s1; + * } else { + * s2; + * discard cond2; + * s3; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * } else { + * s2; + * temp = cond2; + * s3; + * } + * discard temp; + * + * Case 3: Both branches contain a conditional discard: + * ---------------------------------------------------- + * + * if (cond1) { + * s1; + * discard cond2; + * s2; + * } else { + * s3; + * discard cond3; + * s4; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * temp = cond2; + * s2; + * } else { + * s3; + * temp = cond3; + * s4; + * } + * discard temp; + * + * If there are multiple conditional discards, we need only deal with one of + * them. Repeatedly applying this pass will take care of the others. + * + * Unconditional discards are treated as having a condition of "true". + */ + +#include "glsl_types.h" +#include "ir.h" + +class lower_discard_visitor : public ir_hierarchical_visitor { +public: + lower_discard_visitor() + { + this->progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + + bool progress; +}; + + +bool +lower_discard(exec_list *instructions) +{ + lower_discard_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} + + +static ir_discard * +find_discard(exec_list &instructions) +{ + foreach_list(n, &instructions) { + ir_discard *ir = ((ir_instruction *) n)->as_discard(); + if (ir != NULL) + return ir; + } + return NULL; +} + + +static void +replace_discard(void *mem_ctx, ir_variable *var, ir_discard *ir) +{ + ir_rvalue *condition = ir->condition; + + /* For unconditional discards, use "true" as the condition. */ + if (condition == NULL) + condition = new(mem_ctx) ir_constant(true); + + ir_assignment *assignment = + new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var), + condition, NULL); + + ir->replace_with(assignment); +} + + +ir_visitor_status +lower_discard_visitor::visit_leave(ir_if *ir) +{ + ir_discard *then_discard = find_discard(ir->then_instructions); + ir_discard *else_discard = find_discard(ir->else_instructions); + + if (then_discard == NULL && else_discard == NULL) + return visit_continue; + + void *mem_ctx = talloc_parent(ir); + + ir_variable *temp = new(mem_ctx) ir_variable(glsl_type::bool_type, + "discard_cond_temp", + ir_var_temporary); + ir_assignment *temp_initializer = + new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp), + new(mem_ctx) ir_constant(false), NULL); + + ir->insert_before(temp); + ir->insert_before(temp_initializer); + + if (then_discard != NULL) + replace_discard(mem_ctx, temp, then_discard); + + if (else_discard != NULL) + replace_discard(mem_ctx, temp, else_discard); + + ir_discard *discard = then_discard != NULL ? then_discard : else_discard; + discard->condition = new(mem_ctx) ir_dereference_variable(temp); + ir->insert_after(discard); + + this->progress = true; + + return visit_continue; +} diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 5dd602fd83f..d9d86b6c29f 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2858,8 +2858,10 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = lower_quadop_vector(ir, true) || progress; - if (options->EmitNoIfs) + if (options->EmitNoIfs) { + progress = lower_discard(ir) || progress; progress = do_if_to_cond_assign(ir) || progress; + } if (options->EmitNoNoise) progress = lower_noise(ir) || progress; -- cgit v1.2.3 From c4285be9a5bd1adaa89050989374b95a9a601cdc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 24 Nov 2010 22:21:10 -0800 Subject: glsl: Lower ir_binop_pow to a sequence of EXP2 and LOG2 --- src/glsl/ir_optimization.h | 5 +++-- src/glsl/lower_instructions.cpp | 26 ++++++++++++++++++++++++++ src/mesa/main/mtypes.h | 1 + src/mesa/program/ir_to_mesa.cpp | 5 +++-- 4 files changed, 33 insertions(+), 4 deletions(-) (limited to 'src/glsl/ir_optimization.h') diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 1048ff982e0..f264265f4b1 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -32,8 +32,9 @@ #define SUB_TO_ADD_NEG 0x01 #define DIV_TO_MUL_RCP 0x02 #define EXP_TO_EXP2 0x04 -#define LOG_TO_LOG2 0x08 -#define MOD_TO_FRACT 0x10 +#define POW_TO_EXP2 0x08 +#define LOG_TO_LOG2 0x10 +#define MOD_TO_FRACT 0x20 bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations); diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 0d9374d73bd..a5f61f213d0 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -33,6 +33,7 @@ * - SUB_TO_ADD_NEG * - DIV_TO_MUL_RCP * - EXP_TO_EXP2 + * - POW_TO_EXP2 * - LOG_TO_LOG2 * - MOD_TO_FRACT * @@ -61,6 +62,11 @@ * do have base 2 versions, so this pass converts exp and log to exp2 * and log2 operations. * + * POW_TO_EXP2: + * ----------- + * Many older GPUs don't have an x**y instruction. For these GPUs, convert + * x**y to 2**(y * log2(x)). + * * MOD_TO_FRACT: * ------------- * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1)) @@ -91,6 +97,7 @@ private: void div_to_mul_rcp(ir_expression *); void mod_to_fract(ir_expression *); void exp_to_exp2(ir_expression *); + void pow_to_exp2(ir_expression *); void log_to_log2(ir_expression *); }; @@ -180,6 +187,20 @@ lower_instructions_visitor::exp_to_exp2(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::pow_to_exp2(ir_expression *ir) +{ + ir_expression *const log2_x = + new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, + ir->operands[0]); + + ir->operation = ir_unop_exp2; + ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type, + ir->operands[1], log2_x); + ir->operands[1] = NULL; + this->progress = true; +} + void lower_instructions_visitor::log_to_log2(ir_expression *ir) { @@ -254,6 +275,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) mod_to_fract(ir); break; + case ir_binop_pow: + if (lowering(POW_TO_EXP2)) + pow_to_exp2(ir); + break; + default: return visit_continue; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 80c20e09d9a..82495714f21 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2197,6 +2197,7 @@ struct gl_shader_compiler_options GLboolean EmitNoCont; /**< Emit CONT opcode? */ GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */ GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */ + GLboolean EmitNoPow; /**< Emit POW opcodes? */ /** * \name Forms of indirect addressing the driver cannot do. diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index d9d86b6c29f..b274a961b28 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2849,8 +2849,9 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); - lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2); + lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + | LOG_TO_LOG2 + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; -- cgit v1.2.3