11 files changed, 460 insertions, 6 deletions
diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 62984f81c0a..ea4d6be5a41 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -64,6 +64,7 @@ CXX_SOURCES = \
 	lower_variable_index_to_cond_assign.cpp \
 	lower_vec_index_to_cond_assign.cpp \
 	lower_vec_index_to_swizzle.cpp \
+	lower_vector.cpp \
 	opt_algebraic.cpp \
 	opt_constant_folding.cpp \
 	opt_constant_propagation.cpp \
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 1f5e2ebdcb7..741e3cb1775 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -200,18 +200,35 @@ ir_expression::ir_expression(int op, const struct glsl_type *type,
    this->operation = ir_expression_operation(op);
    this->operands[0] = op0;
    this->operands[1] = NULL;
+   this->operands[2] = NULL;
+   this->operands[3] = NULL;
 }
 
 ir_expression::ir_expression(int op, const struct glsl_type *type,
 			     ir_rvalue *op0, ir_rvalue *op1)
 {
-   assert((op1 == NULL) && (get_num_operands(ir_expression_operation(op)) == 1)
+   assert(((op1 == NULL) && (get_num_operands(ir_expression_operation(op)) == 1))
 	  || (get_num_operands(ir_expression_operation(op)) == 2));
    this->ir_type = ir_type_expression;
    this->type = type;
    this->operation = ir_expression_operation(op);
    this->operands[0] = op0;
    this->operands[1] = op1;
+   this->operands[2] = NULL;
+   this->operands[3] = NULL;
+}
+
+ir_expression::ir_expression(int op, const struct glsl_type *type,
+			     ir_rvalue *op0, ir_rvalue *op1,
+			     ir_rvalue *op2, ir_rvalue *op3)
+{
+   this->ir_type = ir_type_expression;
+   this->type = type;
+   this->operation = ir_expression_operation(op);
+   this->operands[0] = op0;
+   this->operands[1] = op1;
+   this->operands[2] = op2;
+   this->operands[3] = op3;
 }
 
 unsigned int
@@ -225,6 +242,9 @@ ir_expression::get_num_operands(ir_expression_operation op)
    if (op <= ir_last_binop)
       return 2;
 
+   if (op == ir_quadop_vector)
+      return 4;
+
    assert(false);
    return 0;
 }
@@ -287,12 +307,13 @@ static const char *const operator_strs[] = {
    "min",
    "max",
    "pow",
+   "vector",
 };
 
 const char *ir_expression::operator_string(ir_expression_operation op)
 {
    assert((unsigned int) op < Elements(operator_strs));
-   assert(Elements(operator_strs) == (ir_binop_pow + 1));
+   assert(Elements(operator_strs) == (ir_quadop_vector + 1));
    return operator_strs[op];
 }
 
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 99fdaa3b09b..be0da07b3b6 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -33,6 +33,7 @@ extern "C" {
 #include <talloc.h>
 }
 
+#include "glsl_types.h"
 #include "list.h"
 #include "ir_visitor.h"
 #include "ir_hierarchical_visitor.h"
@@ -824,6 +825,8 @@ enum ir_expression_operation {
     */
    ir_last_binop = ir_binop_pow,
 
+   ir_quadop_vector,
+
    /**
     * A sentinel marking the last of all operations.
     */
@@ -843,6 +846,12 @@ public:
    ir_expression(int op, const struct glsl_type *type,
 		 ir_rvalue *, ir_rvalue *);
 
+   /**
+    * Constructor for quad operator expressions
+    */
+   ir_expression(int op, const struct glsl_type *type,
+		 ir_rvalue *, ir_rvalue *, ir_rvalue *, ir_rvalue *);
+
    virtual ir_expression *as_expression()
    {
       return this;
@@ -868,7 +877,8 @@ public:
     */
    unsigned int get_num_operands() const
    {
-      return get_num_operands(operation);
+      return (this->operation == ir_quadop_vector)
+	 ? this->type->vector_elements : get_num_operands(operation);
    }
 
    /**
@@ -895,7 +905,7 @@ public:
    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
 
    ir_expression_operation operation;
-   ir_rvalue *operands[2];
+   ir_rvalue *operands[4];
 };
 
 
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 4032647c3a2..325f6066154 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -168,7 +168,8 @@ ir_expression::clone(void *mem_ctx, struct hash_table *ht) const
       op[i] = this->operands[i]->clone(mem_ctx, ht);
    }
 
-   return new(mem_ctx) ir_expression(this->operation, this->type, op[0], op[1]);
+   return new(mem_ctx) ir_expression(this->operation, this->type,
+				     op[0], op[1], op[2], op[3]);
 }
 
 ir_dereference_variable *
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 1fe15050470..4fd6d09a3af 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -788,6 +788,24 @@ ir_expression::constant_expression_value()
       }
       break;
 
+   case ir_quadop_vector:
+      for (unsigned c = 0; c < this->type->vector_elements; c++) {
+	 switch (this->type->base_type) {
+	 case GLSL_TYPE_INT:
+	    data.i[c] = op[c]->value.i[0];
+	    break;
+	 case GLSL_TYPE_UINT:
+	    data.u[c] = op[c]->value.u[0];
+	    break;
+	 case GLSL_TYPE_FLOAT:
+	    data.f[c] = op[c]->value.f[0];
+	    break;
+	 default:
+	    assert(0);
+	 }
+      }
+      break;
+
    default:
       /* FINISHME: Should handle all expression types. */
       return NULL;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index ffdc66b9f75..1f8da16bcb2 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -59,4 +59,5 @@ bool do_vec_index_to_swizzle(exec_list *instructions);
 bool lower_noise(exec_list *instructions);
 bool lower_variable_index_to_cond_assign(exec_list *instructions,
     bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
+bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool optimize_redundant_jumps(exec_list *instructions);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 2a066c1a277..5b055f64d38 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -374,6 +374,51 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type->is_vector());
       assert(ir->operands[0]->type == ir->operands[1]->type);
       break;
+
+   case ir_quadop_vector:
+      /* The vector operator collects some number of scalars and generates a
+       * vector from them.
+       *
+       *  - All of the operands must be scalar.
+       *  - Number of operands must matche the size of the resulting vector.
+       *  - Base type of the operands must match the base type of the result.
+       */
+      assert(ir->type->is_vector());
+      switch (ir->type->vector_elements) {
+      case 2:
+	 assert(ir->operands[0]->type->is_scalar());
+	 assert(ir->operands[0]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[1]->type->is_scalar());
+	 assert(ir->operands[1]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[2] == NULL);
+	 assert(ir->operands[3] == NULL);
+	 break;
+      case 3:
+	 assert(ir->operands[0]->type->is_scalar());
+	 assert(ir->operands[0]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[1]->type->is_scalar());
+	 assert(ir->operands[1]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[2]->type->is_scalar());
+	 assert(ir->operands[2]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[3] == NULL);
+	 break;
+      case 4:
+	 assert(ir->operands[0]->type->is_scalar());
+	 assert(ir->operands[0]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[1]->type->is_scalar());
+	 assert(ir->operands[1]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[2]->type->is_scalar());
+	 assert(ir->operands[2]->type->base_type == ir->type->base_type);
+	 assert(ir->operands[3]->type->is_scalar());
+	 assert(ir->operands[3]->type->base_type == ir->type->base_type);
+	 break;
+      default:
+	 /* The is_vector assertion above should prevent execution from ever
+	  * getting here.
+	  */
+	 assert(!"Should not get here.");
+	 break;
+      }
    }
 
    return visit_continue;
diff --git a/src/glsl/lower_vector.cpp b/src/glsl/lower_vector.cpp
new file mode 100644
index 00000000000..ae501201004
--- /dev/null
+++ b/src/glsl/lower_vector.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_vector.cpp
+ * IR lowering pass to remove some types of ir_quadop_vector
+ *
+ * \author Ian Romanick <[email protected]>
+ */
+
+#include "ir.h"
+#include "ir_rvalue_visitor.h"
+
+class lower_vector_visitor : public ir_rvalue_visitor {
+public:
+   lower_vector_visitor() : progress(false)
+   {
+      /* empty */
+   }
+
+   void handle_rvalue(ir_rvalue **rvalue);
+
+   /**
+    * Should SWZ-like expressions be lowered?
+    */
+   bool dont_lower_swz;
+
+   bool progress;
+};
+
+/**
+ * Determine if an IR expression tree looks like an extended swizzle
+ *
+ * Extended swizzles consist of access of a single vector source (with possible
+ * per component negation) and the constants -1, 0, or 1.
+ */
+bool
+is_extended_swizzle(ir_expression *ir)
+{
+   /* Track any variables that are accessed by this expression.
+    */
+   ir_variable *var = NULL;
+
+   assert(ir->operation == ir_quadop_vector);
+
+   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+      ir_rvalue *op = ir->operands[i];
+
+      while (op != NULL) {
+	 switch (op->ir_type) {
+	 case ir_type_constant: {
+	    const ir_constant *const c = op->as_constant();
+
+	    if (!c->is_one() && !c->is_zero() && !c->is_negative_one())
+	       return false;
+
+	    op = NULL;
+	    break;
+	 }
+
+	 case ir_type_dereference_variable: {
+	    ir_dereference_variable *const d = (ir_dereference_variable *) op;
+
+	    if ((var != NULL) && (var != d->var))
+	       return false;
+
+	    var = d->var;
+	    op = NULL;
+	    break;
+	 }
+
+	 case ir_type_expression: {
+	    ir_expression *const ex = (ir_expression *) op;
+
+	    if (ex->operation != ir_unop_neg)
+	       return false;
+
+	    op = ex->operands[0];
+	    break;
+	 }
+
+	 case ir_type_swizzle:
+	    op = ((ir_swizzle *) op)->val;
+	    break;
+
+	 default:
+	    return false;
+	 }
+      }
+   }
+
+   return true;
+}
+
+void
+lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   ir_expression *expr = (*rvalue)->as_expression();
+   if ((expr == NULL) || (expr->operation != ir_quadop_vector))
+      return;
+
+   if (this->dont_lower_swz && is_extended_swizzle(expr))
+      return;
+
+   /* FINISHME: Is this the right thing to use for the talloc context?
+    */
+   void *const mem_ctx = expr;
+
+   assert(expr->type->vector_elements == expr->get_num_operands());
+
+   /* Generate a temporary with the same type as the ir_quadop_operation.
+    */
+   ir_variable *const temp =
+      new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary);
+
+   this->base_ir->insert_before(temp);
+
+   /* Counter of the number of components collected so far.
+    */
+   unsigned assigned;
+
+   /* Write-mask in the destination that receives counted by 'assigned'.
+    */
+   unsigned write_mask;
+
+
+   /* Generate upto four assignments to that variable.  Try to group component
+    * assignments together:
+    *
+    * - All constant components can be assigned at once.
+    * - All assigments of components from a single variable with the same
+    *   unary operator can be assigned at once.
+    */
+   ir_constant_data d = { { 0 } };
+
+   assigned = 0;
+   write_mask = 0;
+   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
+      const ir_constant *const c = expr->operands[i]->as_constant();
+
+      if (c == NULL)
+	 continue;
+
+      switch (expr->type->base_type) {
+      case GLSL_TYPE_UINT:  d.u[assigned] = c->value.u[0]; break;
+      case GLSL_TYPE_INT:   d.i[assigned] = c->value.i[0]; break;
+      case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break;
+      case GLSL_TYPE_BOOL:  d.b[assigned] = c->value.b[0]; break;
+      defatul:              assert(!"Should not get here."); break;
+      }
+
+      write_mask |= (1U << i);
+      assigned++;
+   }
+
+   assert((write_mask == 0) == (assigned == 0));
+
+   /* If there were constant values, generate an assignment.
+    */
+   if (assigned > 0) {
+      ir_constant *const c =
+	 new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type,
+							  assigned, 0),
+				  &d);
+      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
+      ir_assignment *const assign =
+	 new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask);
+
+      this->base_ir->insert_before(assign);
+   }
+
+   /* FINISHME: This should try to coalesce assignments.
+    */
+   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
+      if (expr->operands[i]->ir_type == ir_type_constant)
+	 continue;
+
+      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
+      ir_assignment *const assign =
+	 new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i));
+
+      this->base_ir->insert_before(assign);
+      assigned++;
+   }
+
+   assert(assigned == expr->type->vector_elements);
+
+   *rvalue = new(mem_ctx) ir_dereference_variable(temp);
+   this->progress = true;
+}
+
+bool
+lower_quadop_vector(exec_list *instructions, bool dont_lower_swz)
+{
+   lower_vector_visitor v;
+
+   v.dont_lower_swz = dont_lower_swz;
+   visit_list_elements(&v, instructions);
+
+   return v.progress;
+}
diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index 9a8080bff3d..3c9af85f312 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -394,7 +394,7 @@ ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
       return;
 
    ir_expression *expr = (*rvalue)->as_expression();
-   if (!expr)
+   if (!expr || expr->operation == ir_quadop_vector)
       return;
 
    *rvalue = handle_expression(expr);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 105327c7fe7..e1cb94452d4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -130,6 +130,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 						GL_TRUE, /* temp */
 						GL_TRUE /* uniform */
 						) || progress;
+	 progress = lower_quadop_vector(shader->ir, false) || progress;
       } while (progress);
 
       validate_ir_tree(shader->ir);
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1b5337e92ee..1cb81830426 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -289,6 +289,8 @@ public:
    GLboolean try_emit_mad(ir_expression *ir,
 			  int mul_operand);
 
+   void emit_swz(ir_expression *ir);
+
    bool process_move_condition(ir_rvalue *ir);
 
    void *mem_ctx;
@@ -958,6 +960,123 @@ ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
 }
 
 void
+ir_to_mesa_visitor::emit_swz(ir_expression *ir)
+{
+   /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
+    * This means that each of the operands is either an immediate value of -1,
+    * 0, or 1, or is a component from one source register (possibly with
+    * negation).
+    */
+   uint8_t components[4] = { 0 };
+   bool negate[4] = { false };
+   ir_variable *var = NULL;
+
+   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+      ir_rvalue *op = ir->operands[i];
+
+      assert(op->type->is_scalar());
+
+      while (op != NULL) {
+	 switch (op->ir_type) {
+	 case ir_type_constant: {
+
+	    assert(op->type->is_scalar());
+
+	    const ir_constant *const c = op->as_constant();
+	    if (c->is_one()) {
+	       components[i] = SWIZZLE_ONE;
+	    } else if (c->is_zero()) {
+	       components[i] = SWIZZLE_ZERO;
+	    } else if (c->is_negative_one()) {
+	       components[i] = SWIZZLE_ONE;
+	       negate[i] = true;
+	    } else {
+	       assert(!"SWZ constant must be 0.0 or 1.0.");
+	    }
+
+	    op = NULL;
+	    break;
+	 }
+
+	 case ir_type_dereference_variable: {
+	    ir_dereference_variable *const deref =
+	       (ir_dereference_variable *) op;
+
+	    assert((var == NULL) || (deref->var == var));
+	    components[i] = SWIZZLE_X;
+	    var = deref->var;
+	    op = NULL;
+	    break;
+	 }
+
+	 case ir_type_expression: {
+	    ir_expression *const expr = (ir_expression *) op;
+
+	    assert(expr->operation == ir_unop_neg);
+	    negate[i] = true;
+
+	    op = expr->operands[0];
+	    break;
+	 }
+
+	 case ir_type_swizzle: {
+	    ir_swizzle *const swiz = (ir_swizzle *) op;
+
+	    components[i] = swiz->mask.x;
+	    op = swiz->val;
+	    break;
+	 }
+
+	 default:
+	    assert(!"Should not get here.");
+	    return;
+	 }
+      }
+   }
+
+   assert(var != NULL);
+
+   ir_dereference_variable *const deref =
+      new(mem_ctx) ir_dereference_variable(var);
+
+   this->result.file = PROGRAM_UNDEFINED;
+   deref->accept(this);
+   if (this->result.file == PROGRAM_UNDEFINED) {
+      ir_print_visitor v;
+      printf("Failed to get tree for expression operand:\n");
+      deref->accept(&v);
+      exit(1);
+   }
+
+   ir_to_mesa_src_reg src;
+
+   src = this->result;
+   src.swizzle = MAKE_SWIZZLE4(components[0],
+			       components[1],
+			       components[2],
+			       components[3]);
+   src.negate = ((unsigned(negate[0]) << 0)
+		 | (unsigned(negate[1]) << 1)
+		 | (unsigned(negate[2]) << 2)
+		 | (unsigned(negate[3]) << 3));
+
+   /* Storage for our result.  Ideally for an assignment we'd be using the
+    * actual storage for the result here, instead.
+    */
+   const ir_to_mesa_src_reg result_src = get_temp(ir->type);
+   ir_to_mesa_dst_reg result_dst = ir_to_mesa_dst_reg_from_src(result_src);
+
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   ir_to_mesa_emit_op1(ir, OPCODE_SWZ, result_dst, src);
+   this->result = result_src;
+}
+
+void
 ir_to_mesa_visitor::visit(ir_expression *ir)
 {
    unsigned int operand;
@@ -974,6 +1093,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	 return;
    }
 
+   if (ir->operation == ir_quadop_vector) {
+      this->emit_swz(ir);
+      return;
+   }
+
    for (operand = 0; operand < ir->get_num_operands(); operand++) {
       this->result.file = PROGRAM_UNDEFINED;
       ir->operands[operand]->accept(this);
@@ -1231,6 +1355,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
    case ir_unop_round_even:
       assert(!"GLSL 1.30 features unsupported");
       break;
+
+   case ir_quadop_vector:
+      /* This operation should have already been handled.
+       */
+      assert(!"Should not get here.");
+      break;
    }
 
    this->result = result_src;
@@ -2676,6 +2806,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
 	 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
 
+	 progress = lower_quadop_vector(ir, true) || progress;
+
 	 if (options->EmitNoIfs)
 	    progress = do_if_to_cond_assign(ir) || progress;