diff options
-rw-r--r-- | src/glsl/Makefile | 1 | ||||
-rw-r--r-- | src/glsl/ir.cpp | 25 | ||||
-rw-r--r-- | src/glsl/ir.h | 14 | ||||
-rw-r--r-- | src/glsl/ir_clone.cpp | 3 | ||||
-rw-r--r-- | src/glsl/ir_constant_expression.cpp | 18 | ||||
-rw-r--r-- | src/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/glsl/ir_validate.cpp | 45 | ||||
-rw-r--r-- | src/glsl/lower_vector.cpp | 224 | ||||
-rw-r--r-- | src/glsl/opt_algebraic.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 1 | ||||
-rw-r--r-- | src/mesa/program/ir_to_mesa.cpp | 132 |
11 files changed, 460 insertions, 6 deletions
diff --git a/src/glsl/Makefile b/src/glsl/Makefile index 62984f81c0a..ea4d6be5a41 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -64,6 +64,7 @@ CXX_SOURCES = \ lower_variable_index_to_cond_assign.cpp \ lower_vec_index_to_cond_assign.cpp \ lower_vec_index_to_swizzle.cpp \ + lower_vector.cpp \ opt_algebraic.cpp \ opt_constant_folding.cpp \ opt_constant_propagation.cpp \ diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 1f5e2ebdcb7..741e3cb1775 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -200,18 +200,35 @@ ir_expression::ir_expression(int op, const struct glsl_type *type, this->operation = ir_expression_operation(op); this->operands[0] = op0; this->operands[1] = NULL; + this->operands[2] = NULL; + this->operands[3] = NULL; } ir_expression::ir_expression(int op, const struct glsl_type *type, ir_rvalue *op0, ir_rvalue *op1) { - assert((op1 == NULL) && (get_num_operands(ir_expression_operation(op)) == 1) + assert(((op1 == NULL) && (get_num_operands(ir_expression_operation(op)) == 1)) || (get_num_operands(ir_expression_operation(op)) == 2)); this->ir_type = ir_type_expression; this->type = type; this->operation = ir_expression_operation(op); this->operands[0] = op0; this->operands[1] = op1; + this->operands[2] = NULL; + this->operands[3] = NULL; +} + +ir_expression::ir_expression(int op, const struct glsl_type *type, + ir_rvalue *op0, ir_rvalue *op1, + ir_rvalue *op2, ir_rvalue *op3) +{ + this->ir_type = ir_type_expression; + this->type = type; + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = op2; + this->operands[3] = op3; } unsigned int @@ -225,6 +242,9 @@ ir_expression::get_num_operands(ir_expression_operation op) if (op <= ir_last_binop) return 2; + if (op == ir_quadop_vector) + return 4; + assert(false); return 0; } @@ -287,12 +307,13 @@ static const char *const operator_strs[] = { "min", "max", "pow", + "vector", }; const char *ir_expression::operator_string(ir_expression_operation op) { assert((unsigned int) op < Elements(operator_strs)); - assert(Elements(operator_strs) == (ir_binop_pow + 1)); + assert(Elements(operator_strs) == (ir_quadop_vector + 1)); return operator_strs[op]; } diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 99fdaa3b09b..be0da07b3b6 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -33,6 +33,7 @@ extern "C" { #include <talloc.h> } +#include "glsl_types.h" #include "list.h" #include "ir_visitor.h" #include "ir_hierarchical_visitor.h" @@ -824,6 +825,8 @@ enum ir_expression_operation { */ ir_last_binop = ir_binop_pow, + ir_quadop_vector, + /** * A sentinel marking the last of all operations. */ @@ -843,6 +846,12 @@ public: ir_expression(int op, const struct glsl_type *type, ir_rvalue *, ir_rvalue *); + /** + * Constructor for quad operator expressions + */ + ir_expression(int op, const struct glsl_type *type, + ir_rvalue *, ir_rvalue *, ir_rvalue *, ir_rvalue *); + virtual ir_expression *as_expression() { return this; @@ -868,7 +877,8 @@ public: */ unsigned int get_num_operands() const { - return get_num_operands(operation); + return (this->operation == ir_quadop_vector) + ? this->type->vector_elements : get_num_operands(operation); } /** @@ -895,7 +905,7 @@ public: virtual ir_visitor_status accept(ir_hierarchical_visitor *); ir_expression_operation operation; - ir_rvalue *operands[2]; + ir_rvalue *operands[4]; }; diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp index 4032647c3a2..325f6066154 100644 --- a/src/glsl/ir_clone.cpp +++ b/src/glsl/ir_clone.cpp @@ -168,7 +168,8 @@ ir_expression::clone(void *mem_ctx, struct hash_table *ht) const op[i] = this->operands[i]->clone(mem_ctx, ht); } - return new(mem_ctx) ir_expression(this->operation, this->type, op[0], op[1]); + return new(mem_ctx) ir_expression(this->operation, this->type, + op[0], op[1], op[2], op[3]); } ir_dereference_variable * diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index 1fe15050470..4fd6d09a3af 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -788,6 +788,24 @@ ir_expression::constant_expression_value() } break; + case ir_quadop_vector: + for (unsigned c = 0; c < this->type->vector_elements; c++) { + switch (this->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[c]->value.i[0]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[c]->value.u[0]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[c]->value.f[0]; + break; + default: + assert(0); + } + } + break; + default: /* FINISHME: Should handle all expression types. */ return NULL; diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index ffdc66b9f75..1f8da16bcb2 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -59,4 +59,5 @@ bool do_vec_index_to_swizzle(exec_list *instructions); bool lower_noise(exec_list *instructions); bool lower_variable_index_to_cond_assign(exec_list *instructions, bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform); +bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); bool optimize_redundant_jumps(exec_list *instructions); diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 2a066c1a277..5b055f64d38 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -374,6 +374,51 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type->is_vector()); assert(ir->operands[0]->type == ir->operands[1]->type); break; + + case ir_quadop_vector: + /* The vector operator collects some number of scalars and generates a + * vector from them. + * + * - All of the operands must be scalar. + * - Number of operands must matche the size of the resulting vector. + * - Base type of the operands must match the base type of the result. + */ + assert(ir->type->is_vector()); + switch (ir->type->vector_elements) { + case 2: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2] == NULL); + assert(ir->operands[3] == NULL); + break; + case 3: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2]->type->is_scalar()); + assert(ir->operands[2]->type->base_type == ir->type->base_type); + assert(ir->operands[3] == NULL); + break; + case 4: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2]->type->is_scalar()); + assert(ir->operands[2]->type->base_type == ir->type->base_type); + assert(ir->operands[3]->type->is_scalar()); + assert(ir->operands[3]->type->base_type == ir->type->base_type); + break; + default: + /* The is_vector assertion above should prevent execution from ever + * getting here. + */ + assert(!"Should not get here."); + break; + } } return visit_continue; diff --git a/src/glsl/lower_vector.cpp b/src/glsl/lower_vector.cpp new file mode 100644 index 00000000000..ae501201004 --- /dev/null +++ b/src/glsl/lower_vector.cpp @@ -0,0 +1,224 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vector.cpp + * IR lowering pass to remove some types of ir_quadop_vector + * + * \author Ian Romanick <[email protected]> + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" + +class lower_vector_visitor : public ir_rvalue_visitor { +public: + lower_vector_visitor() : progress(false) + { + /* empty */ + } + + void handle_rvalue(ir_rvalue **rvalue); + + /** + * Should SWZ-like expressions be lowered? + */ + bool dont_lower_swz; + + bool progress; +}; + +/** + * Determine if an IR expression tree looks like an extended swizzle + * + * Extended swizzles consist of access of a single vector source (with possible + * per component negation) and the constants -1, 0, or 1. + */ +bool +is_extended_swizzle(ir_expression *ir) +{ + /* Track any variables that are accessed by this expression. + */ + ir_variable *var = NULL; + + assert(ir->operation == ir_quadop_vector); + + for (unsigned i = 0; i < ir->type->vector_elements; i++) { + ir_rvalue *op = ir->operands[i]; + + while (op != NULL) { + switch (op->ir_type) { + case ir_type_constant: { + const ir_constant *const c = op->as_constant(); + + if (!c->is_one() && !c->is_zero() && !c->is_negative_one()) + return false; + + op = NULL; + break; + } + + case ir_type_dereference_variable: { + ir_dereference_variable *const d = (ir_dereference_variable *) op; + + if ((var != NULL) && (var != d->var)) + return false; + + var = d->var; + op = NULL; + break; + } + + case ir_type_expression: { + ir_expression *const ex = (ir_expression *) op; + + if (ex->operation != ir_unop_neg) + return false; + + op = ex->operands[0]; + break; + } + + case ir_type_swizzle: + op = ((ir_swizzle *) op)->val; + break; + + default: + return false; + } + } + } + + return true; +} + +void +lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if ((expr == NULL) || (expr->operation != ir_quadop_vector)) + return; + + if (this->dont_lower_swz && is_extended_swizzle(expr)) + return; + + /* FINISHME: Is this the right thing to use for the talloc context? + */ + void *const mem_ctx = expr; + + assert(expr->type->vector_elements == expr->get_num_operands()); + + /* Generate a temporary with the same type as the ir_quadop_operation. + */ + ir_variable *const temp = + new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary); + + this->base_ir->insert_before(temp); + + /* Counter of the number of components collected so far. + */ + unsigned assigned; + + /* Write-mask in the destination that receives counted by 'assigned'. + */ + unsigned write_mask; + + + /* Generate upto four assignments to that variable. Try to group component + * assignments together: + * + * - All constant components can be assigned at once. + * - All assigments of components from a single variable with the same + * unary operator can be assigned at once. + */ + ir_constant_data d = { { 0 } }; + + assigned = 0; + write_mask = 0; + for (unsigned i = 0; i < expr->type->vector_elements; i++) { + const ir_constant *const c = expr->operands[i]->as_constant(); + + if (c == NULL) + continue; + + switch (expr->type->base_type) { + case GLSL_TYPE_UINT: d.u[assigned] = c->value.u[0]; break; + case GLSL_TYPE_INT: d.i[assigned] = c->value.i[0]; break; + case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break; + case GLSL_TYPE_BOOL: d.b[assigned] = c->value.b[0]; break; + defatul: assert(!"Should not get here."); break; + } + + write_mask |= (1U << i); + assigned++; + } + + assert((write_mask == 0) == (assigned == 0)); + + /* If there were constant values, generate an assignment. + */ + if (assigned > 0) { + ir_constant *const c = + new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type, + assigned, 0), + &d); + ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); + ir_assignment *const assign = + new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask); + + this->base_ir->insert_before(assign); + } + + /* FINISHME: This should try to coalesce assignments. + */ + for (unsigned i = 0; i < expr->type->vector_elements; i++) { + if (expr->operands[i]->ir_type == ir_type_constant) + continue; + + ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); + ir_assignment *const assign = + new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i)); + + this->base_ir->insert_before(assign); + assigned++; + } + + assert(assigned == expr->type->vector_elements); + + *rvalue = new(mem_ctx) ir_dereference_variable(temp); + this->progress = true; +} + +bool +lower_quadop_vector(exec_list *instructions, bool dont_lower_swz) +{ + lower_vector_visitor v; + + v.dont_lower_swz = dont_lower_swz; + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 9a8080bff3d..3c9af85f312 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -394,7 +394,7 @@ ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) return; ir_expression *expr = (*rvalue)->as_expression(); - if (!expr) + if (!expr || expr->operation == ir_quadop_vector) return; *rvalue = handle_expression(expr); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 105327c7fe7..e1cb94452d4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -130,6 +130,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) GL_TRUE, /* temp */ GL_TRUE /* uniform */ ) || progress; + progress = lower_quadop_vector(shader->ir, false) || progress; } while (progress); validate_ir_tree(shader->ir); diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1b5337e92ee..1cb81830426 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -289,6 +289,8 @@ public: GLboolean try_emit_mad(ir_expression *ir, int mul_operand); + void emit_swz(ir_expression *ir); + bool process_move_condition(ir_rvalue *ir); void *mem_ctx; @@ -958,6 +960,123 @@ ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, } void +ir_to_mesa_visitor::emit_swz(ir_expression *ir) +{ + /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. + * This means that each of the operands is either an immediate value of -1, + * 0, or 1, or is a component from one source register (possibly with + * negation). + */ + uint8_t components[4] = { 0 }; + bool negate[4] = { false }; + ir_variable *var = NULL; + + for (unsigned i = 0; i < ir->type->vector_elements; i++) { + ir_rvalue *op = ir->operands[i]; + + assert(op->type->is_scalar()); + + while (op != NULL) { + switch (op->ir_type) { + case ir_type_constant: { + + assert(op->type->is_scalar()); + + const ir_constant *const c = op->as_constant(); + if (c->is_one()) { + components[i] = SWIZZLE_ONE; + } else if (c->is_zero()) { + components[i] = SWIZZLE_ZERO; + } else if (c->is_negative_one()) { + components[i] = SWIZZLE_ONE; + negate[i] = true; + } else { + assert(!"SWZ constant must be 0.0 or 1.0."); + } + + op = NULL; + break; + } + + case ir_type_dereference_variable: { + ir_dereference_variable *const deref = + (ir_dereference_variable *) op; + + assert((var == NULL) || (deref->var == var)); + components[i] = SWIZZLE_X; + var = deref->var; + op = NULL; + break; + } + + case ir_type_expression: { + ir_expression *const expr = (ir_expression *) op; + + assert(expr->operation == ir_unop_neg); + negate[i] = true; + + op = expr->operands[0]; + break; + } + + case ir_type_swizzle: { + ir_swizzle *const swiz = (ir_swizzle *) op; + + components[i] = swiz->mask.x; + op = swiz->val; + break; + } + + default: + assert(!"Should not get here."); + return; + } + } + } + + assert(var != NULL); + + ir_dereference_variable *const deref = + new(mem_ctx) ir_dereference_variable(var); + + this->result.file = PROGRAM_UNDEFINED; + deref->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + deref->accept(&v); + exit(1); + } + + ir_to_mesa_src_reg src; + + src = this->result; + src.swizzle = MAKE_SWIZZLE4(components[0], + components[1], + components[2], + components[3]); + src.negate = ((unsigned(negate[0]) << 0) + | (unsigned(negate[1]) << 1) + | (unsigned(negate[2]) << 2) + | (unsigned(negate[3]) << 3)); + + /* Storage for our result. Ideally for an assignment we'd be using the + * actual storage for the result here, instead. + */ + const ir_to_mesa_src_reg result_src = get_temp(ir->type); + ir_to_mesa_dst_reg result_dst = ir_to_mesa_dst_reg_from_src(result_src); + + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + ir_to_mesa_emit_op1(ir, OPCODE_SWZ, result_dst, src); + this->result = result_src; +} + +void ir_to_mesa_visitor::visit(ir_expression *ir) { unsigned int operand; @@ -974,6 +1093,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir) return; } + if (ir->operation == ir_quadop_vector) { + this->emit_swz(ir); + return; + } + for (operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = PROGRAM_UNDEFINED; ir->operands[operand]->accept(this); @@ -1231,6 +1355,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_round_even: assert(!"GLSL 1.30 features unsupported"); break; + + case ir_quadop_vector: + /* This operation should have already been handled. + */ + assert(!"Should not get here."); + break; } this->result = result_src; @@ -2676,6 +2806,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + progress = lower_quadop_vector(ir, true) || progress; + if (options->EmitNoIfs) progress = do_if_to_cond_assign(ir) || progress; |