diff options
-rw-r--r-- | src/glsl/ir_optimization.h | 13 | ||||
-rw-r--r-- | src/glsl/lower_instructions.cpp | 125 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 1 | ||||
-rw-r--r-- | src/mesa/program/ir_to_mesa.cpp | 2 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 |
5 files changed, 80 insertions, 63 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index f7808bdda9a..48448d4a16a 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -29,12 +29,13 @@ */ /* Operations for lower_instructions() */ -#define SUB_TO_ADD_NEG 0x01 -#define DIV_TO_MUL_RCP 0x02 -#define EXP_TO_EXP2 0x04 -#define POW_TO_EXP2 0x08 -#define LOG_TO_LOG2 0x10 -#define MOD_TO_FRACT 0x20 +#define SUB_TO_ADD_NEG 0x01 +#define DIV_TO_MUL_RCP 0x02 +#define EXP_TO_EXP2 0x04 +#define POW_TO_EXP2 0x08 +#define LOG_TO_LOG2 0x10 +#define MOD_TO_FRACT 0x20 +#define INT_DIV_TO_MUL_RCP 0x40 bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations); diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 23aa19bde6f..d79eb0a7ffb 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -32,6 +32,7 @@ * Currently supported transformations: * - SUB_TO_ADD_NEG * - DIV_TO_MUL_RCP + * - INT_DIV_TO_MUL_RCP * - EXP_TO_EXP2 * - POW_TO_EXP2 * - LOG_TO_LOG2 @@ -47,8 +48,8 @@ * want to recognize add(op0, neg(op1)) or the other way around to * produce a subtract anyway. * - * DIV_TO_MUL_RCP: - * --------------- + * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP: + * -------------------------------------- * Breaks an ir_unop_div expression down to op0 * (rcp(op1)). * * Many GPUs don't have a divide instruction (945 and 965 included), @@ -56,6 +57,10 @@ * reciprocal. By breaking the operation down, constant reciprocals * can get constant folded. * + * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP + * handles the integer case, converting to and from floating point so that + * RCP is possible. + * * EXP_TO_EXP2 and LOG_TO_LOG2: * ---------------------------- * Many GPUs don't have a base e log or exponent instruction, but they @@ -95,6 +100,7 @@ private: void sub_to_add_neg(ir_expression *); void div_to_mul_rcp(ir_expression *); + void int_div_to_mul_rcp(ir_expression *); void mod_to_fract(ir_expression *); void exp_to_exp2(ir_expression *); void pow_to_exp2(ir_expression *); @@ -127,60 +133,67 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir) void lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) { - if (!ir->operands[1]->type->is_integer()) { - /* New expression for the 1.0 / op1 */ - ir_rvalue *expr; - expr = new(ir) ir_expression(ir_unop_rcp, - ir->operands[1]->type, - ir->operands[1], - NULL); - - /* op0 / op1 -> op0 * (1.0 / op1) */ - ir->operation = ir_binop_mul; - ir->operands[1] = expr; + assert(ir->operands[1]->type->is_float()); + + /* New expression for the 1.0 / op1 */ + ir_rvalue *expr; + expr = new(ir) ir_expression(ir_unop_rcp, + ir->operands[1]->type, + ir->operands[1]); + + /* op0 / op1 -> op0 * (1.0 / op1) */ + ir->operation = ir_binop_mul; + ir->operands[1] = expr; + + this->progress = true; +} + +void +lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir) +{ + assert(ir->operands[1]->type->is_integer()); + + /* Be careful with integer division -- we need to do it as a + * float and re-truncate, since rcp(n > 1) of an integer would + * just be 0. + */ + ir_rvalue *op0, *op1; + const struct glsl_type *vec_type; + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->operands[1]->type->vector_elements, + ir->operands[1]->type->matrix_columns); + + if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) + op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL); + else + op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL); + + op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->operands[0]->type->vector_elements, + ir->operands[0]->type->matrix_columns); + + if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) + op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL); + else + op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->type->vector_elements, + ir->type->matrix_columns); + + op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); + + if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { + ir->operation = ir_unop_f2i; + ir->operands[0] = op0; } else { - /* Be careful with integer division -- we need to do it as a - * float and re-truncate, since rcp(n > 1) of an integer would - * just be 0. - */ - ir_rvalue *op0, *op1; - const struct glsl_type *vec_type; - - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - ir->operands[1]->type->vector_elements, - ir->operands[1]->type->matrix_columns); - - if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) - op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL); - else - op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL); - - op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); - - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - ir->operands[0]->type->vector_elements, - ir->operands[0]->type->matrix_columns); - - if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) - op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL); - else - op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); - - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - ir->type->vector_elements, - ir->type->matrix_columns); - - op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); - - if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { - ir->operation = ir_unop_f2i; - ir->operands[0] = op0; - } else { - ir->operation = ir_unop_i2u; - ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0); - } - ir->operands[1] = NULL; + ir->operation = ir_unop_i2u; + ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0); } + ir->operands[1] = NULL; this->progress = true; } @@ -265,7 +278,9 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) break; case ir_binop_div: - if (lowering(DIV_TO_MUL_RCP)) + if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP)) + int_div_to_mul_rcp(ir); + else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP)) div_to_mul_rcp(ir); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0a21094708b..a6ed810a16e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -100,6 +100,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) lower_instructions(shader->ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | + INT_DIV_TO_MUL_RCP | SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2); diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 6820e4c6ba7..dd154db8b03 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -3232,7 +3232,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2 + | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3fbb0cdd248..98bea69b959 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4982,7 +4982,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2 + | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; |