42 files changed, 2131 insertions, 740 deletions
diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
index 6898fb0d492..59cc8577a6e 100644
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -38,7 +38,8 @@ LOCAL_C_INCLUDES += \
 	$(MESA_TOP)/src/glsl/nir
 
 LOCAL_EXPORT_C_INCLUDE_DIRS += \
-	$(intermediates)/nir
+	$(intermediates)/nir \
+	$(MESA_TOP)/src/glsl/nir
 
 LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
 	$(LIBGLCPP_GENERATED_FILES) \
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 957fd6b90ba..0c9fd75d206 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -22,10 +22,12 @@ NIR_FILES = \
 	nir/glsl_to_nir.h \
 	nir/glsl_types.cpp \
 	nir/glsl_types.h \
+	nir/builtin_type_macros.h \
 	nir/nir.c \
 	nir/nir.h \
 	nir/nir_array.h \
 	nir/nir_builder.h \
+	nir/nir_clone.c \
 	nir/nir_constant_expressions.h \
 	nir/nir_control_flow.c \
 	nir/nir_control_flow.h \
@@ -102,7 +104,6 @@ LIBGLSL_FILES = \
 	blob.c \
 	blob.h \
 	builtin_functions.cpp \
-	builtin_type_macros.h \
 	builtin_types.cpp \
 	builtin_variables.cpp \
 	glsl_parser_extras.cpp \
diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 1b75234d578..3bea63ea0ed 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -336,7 +336,7 @@ public:
       array_dimensions.push_tail(&dim->link);
    }
 
-   const bool is_single_dimension()
+   bool is_single_dimension() const
    {
       return this->array_dimensions.tail_pred->prev != NULL &&
              this->array_dimensions.tail_pred->prev->is_head_sentinel();
@@ -350,6 +350,26 @@ public:
    exec_list array_dimensions;
 };
 
+class ast_layout_expression : public ast_node {
+public:
+   ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr)
+   {
+      set_location(locp);
+      layout_const_expressions.push_tail(&expr->link);
+   }
+
+   bool process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+                                   const char *qual_indentifier,
+                                   unsigned *value, bool can_be_zero);
+
+   void merge_qualifier(ast_layout_expression *l_expr)
+   {
+      layout_const_expressions.append_list(&l_expr->layout_const_expressions);
+   }
+
+   exec_list layout_const_expressions;
+};
+
 /**
  * C-style aggregate initialization class
  *
@@ -558,7 +578,7 @@ struct ast_type_qualifier {
    unsigned precision:2;
 
    /** Geometry shader invocations for GL_ARB_gpu_shader5. */
-   int invocations;
+   ast_layout_expression *invocations;
 
    /**
     * Location specified via GL_ARB_explicit_attrib_location layout
@@ -566,20 +586,20 @@ struct ast_type_qualifier {
     * \note
     * This field is only valid if \c explicit_location is set.
     */
-   int location;
+   ast_expression *location;
    /**
     * Index specified via GL_ARB_explicit_attrib_location layout
     *
     * \note
     * This field is only valid if \c explicit_index is set.
     */
-   int index;
+   ast_expression *index;
 
    /** Maximum output vertices in GLSL 1.50 geometry shaders. */
-   int max_vertices;
+   ast_layout_expression *max_vertices;
 
    /** Stream in GLSL 1.50 geometry shaders. */
-   unsigned stream;
+   ast_expression *stream;
 
    /**
     * Input or output primitive type in GLSL 1.50 geometry shaders
@@ -593,7 +613,7 @@ struct ast_type_qualifier {
     * \note
     * This field is only valid if \c explicit_binding is set.
     */
-   int binding;
+   ast_expression *binding;
 
    /**
     * Offset specified via GL_ARB_shader_atomic_counter's "offset"
@@ -602,14 +622,14 @@ struct ast_type_qualifier {
     * \note
     * This field is only valid if \c explicit_offset is set.
     */
-   int offset;
+   ast_expression *offset;
 
    /**
     * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}"
     * layout qualifier.  Element i of this array is only valid if
     * flags.q.local_size & (1 << i) is set.
     */
-   int local_size[3];
+   ast_layout_expression *local_size[3];
 
    /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */
    GLenum vertex_spacing;
@@ -621,7 +641,7 @@ struct ast_type_qualifier {
    bool point_mode;
 
    /** Tessellation control shader: number of output vertices */
-   int vertices;
+   ast_layout_expression *vertices;
 
    /**
     * Image format specified with an ARB_shader_image_load_store
@@ -752,7 +772,7 @@ public:
 class ast_fully_specified_type : public ast_node {
 public:
    virtual void print(void) const;
-   bool has_qualifiers() const;
+   bool has_qualifiers(_mesa_glsl_parse_state *state) const;
 
    ast_fully_specified_type() : qualifier(), specifier(NULL)
    {
@@ -1093,17 +1113,13 @@ public:
 class ast_tcs_output_layout : public ast_node
 {
 public:
-   ast_tcs_output_layout(const struct YYLTYPE &locp, int vertices)
-      : vertices(vertices)
+   ast_tcs_output_layout(const struct YYLTYPE &locp)
    {
       set_location(locp);
    }
 
    virtual ir_rvalue *hir(exec_list *instructions,
                           struct _mesa_glsl_parse_state *state);
-
-private:
-   const int vertices;
 };
 
 
@@ -1135,9 +1151,12 @@ private:
 class ast_cs_input_layout : public ast_node
 {
 public:
-   ast_cs_input_layout(const struct YYLTYPE &locp, const unsigned *local_size)
+   ast_cs_input_layout(const struct YYLTYPE &locp,
+                       ast_layout_expression **local_size)
    {
-      memcpy(this->local_size, local_size, sizeof(this->local_size));
+      for (int i = 0; i < 3; i++) {
+         this->local_size[i] = local_size[i];
+      }
       set_location(locp);
    }
 
@@ -1145,7 +1164,7 @@ public:
                           struct _mesa_glsl_parse_state *state);
 
 private:
-   unsigned local_size[3];
+   ast_layout_expression *local_size[3];
 };
 
 /*@}*/
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 65db2618895..52881a4da7a 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2491,7 +2491,7 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
                        "uniform block layout qualifiers row_major and "
                        "column_major may not be applied to variables "
                        "outside of uniform blocks");
-   } else if (!type->is_matrix()) {
+   } else if (!type->without_array()->is_matrix()) {
       /* The OpenGL ES 3.0 conformance tests did not originally allow
        * matrix layout qualifiers on non-matrices.  However, the OpenGL
        * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were
@@ -2502,39 +2502,88 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
                          "uniform block layout qualifiers row_major and "
                          "column_major applied to non-matrix types may "
                          "be rejected by older compilers");
-   } else if (type->is_record()) {
-      /* We allow 'layout(row_major)' on structure types because it's the only
-       * way to get row-major layouts on matrices contained in structures.
-       */
-      _mesa_glsl_warning(loc, state,
-                         "uniform block layout qualifiers row_major and "
-                         "column_major applied to structure types is not "
-                         "strictly conformant and may be rejected by other "
-                         "compilers");
    }
 }
 
 static bool
-validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
+process_qualifier_constant(struct _mesa_glsl_parse_state *state,
                            YYLTYPE *loc,
-                           const glsl_type *type,
-                           const ast_type_qualifier *qual)
+                           const char *qual_indentifier,
+                           ast_expression *const_expression,
+                           unsigned *value)
+{
+   exec_list dummy_instructions;
+
+   if (const_expression == NULL) {
+      *value = 0;
+      return true;
+   }
+
+   ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+   ir_constant *const const_int = ir->constant_expression_value();
+   if (const_int == NULL || !const_int->type->is_integer()) {
+      _mesa_glsl_error(loc, state, "%s must be an integral constant "
+                       "expression", qual_indentifier);
+      return false;
+   }
+
+   if (const_int->value.i[0] < 0) {
+      _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
+                       qual_indentifier, const_int->value.u[0]);
+      return false;
+   }
+
+   /* If the location is const (and we've verified that
+    * it is) then no instructions should have been emitted
+    * when we converted it to HIR. If they were emitted,
+    * then either the location isn't const after all, or
+    * we are emitting unnecessary instructions.
+    */
+   assert(dummy_instructions.is_empty());
+
+   *value = const_int->value.u[0];
+   return true;
+}
+
+static bool
+validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state,
+                          unsigned stream)
+{
+   if (stream >= state->ctx->Const.MaxVertexStreams) {
+      _mesa_glsl_error(loc, state,
+                       "invalid stream specified %d is larger than "
+                       "MAX_VERTEX_STREAMS - 1 (%d).",
+                       stream, state->ctx->Const.MaxVertexStreams - 1);
+      return false;
+   }
+
+   return true;
+}
+
+static void
+apply_explicit_binding(struct _mesa_glsl_parse_state *state,
+                       YYLTYPE *loc,
+                       ir_variable *var,
+                       const glsl_type *type,
+                       const ast_type_qualifier *qual)
 {
    if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
       _mesa_glsl_error(loc, state,
                        "the \"binding\" qualifier only applies to uniforms and "
                        "shader storage buffer objects");
-      return false;
+      return;
    }
 
-   if (qual->binding < 0) {
-      _mesa_glsl_error(loc, state, "binding values must be >= 0");
-      return false;
+   unsigned qual_binding;
+   if (!process_qualifier_constant(state, loc, "binding", qual->binding,
+                                   &qual_binding)) {
+      return;
    }
 
    const struct gl_context *const ctx = state->ctx;
    unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
-   unsigned max_index = qual->binding + elements - 1;
+   unsigned max_index = qual_binding + elements - 1;
    const glsl_type *base_type = type->without_array();
 
    if (base_type->is_interface()) {
@@ -2550,11 +2599,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
        */
       if (qual->flags.q.uniform &&
          max_index >= ctx->Const.MaxUniformBufferBindings) {
-         _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds "
+         _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds "
                           "the maximum number of UBO binding points (%d)",
-                          qual->binding, elements,
+                          qual_binding, elements,
                           ctx->Const.MaxUniformBufferBindings);
-         return false;
+         return;
       }
 
       /* SSBOs. From page 67 of the GLSL 4.30 specification:
@@ -2568,11 +2617,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
        */
       if (qual->flags.q.buffer &&
          max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
-         _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds "
+         _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds "
                           "the maximum number of SSBO binding points (%d)",
-                          qual->binding, elements,
+                          qual_binding, elements,
                           ctx->Const.MaxShaderStorageBufferBindings);
-         return false;
+         return;
       }
    } else if (base_type->is_sampler()) {
       /* Samplers.  From page 63 of the GLSL 4.20 specification:
@@ -2587,19 +2636,19 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
       if (max_index >= limit) {
          _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers "
                           "exceeds the maximum number of texture image units "
-                          "(%d)", qual->binding, elements, limit);
+                          "(%u)", qual_binding, elements, limit);
 
-         return false;
+         return;
       }
    } else if (base_type->contains_atomic()) {
       assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS);
-      if (unsigned(qual->binding) >= ctx->Const.MaxAtomicBufferBindings) {
+      if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) {
          _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the "
                           " maximum number of atomic counter buffer bindings"
-                          "(%d)", qual->binding,
+                          "(%u)", qual_binding,
                           ctx->Const.MaxAtomicBufferBindings);
 
-         return false;
+         return;
       }
    } else if (state->is_version(420, 310) && base_type->is_image()) {
       assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
@@ -2607,17 +2656,20 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
          _mesa_glsl_error(loc, state, "Image binding %d exceeds the "
                           " maximum number of image units (%d)", max_index,
                           ctx->Const.MaxImageUnits);
-         return false;
+         return;
       }
 
    } else {
       _mesa_glsl_error(loc, state,
                        "the \"binding\" qualifier only applies to uniform "
                        "blocks, opaque variables, or arrays thereof");
-      return false;
+      return;
    }
 
-   return true;
+   var->data.explicit_binding = true;
+   var->data.binding = qual_binding;
+
+   return;
 }
 
 
@@ -2660,20 +2712,26 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
 
 
 static void
-validate_explicit_location(const struct ast_type_qualifier *qual,
-                           ir_variable *var,
-                           struct _mesa_glsl_parse_state *state,
-                           YYLTYPE *loc)
+apply_explicit_location(const struct ast_type_qualifier *qual,
+                        ir_variable *var,
+                        struct _mesa_glsl_parse_state *state,
+                        YYLTYPE *loc)
 {
    bool fail = false;
 
+   unsigned qual_location;
+   if (!process_qualifier_constant(state, loc, "location", qual->location,
+                                   &qual_location)) {
+      return;
+   }
+
    /* Checks for GL_ARB_explicit_uniform_location. */
    if (qual->flags.q.uniform) {
       if (!state->check_explicit_uniform_location_allowed(loc, var))
          return;
 
       const struct gl_context *const ctx = state->ctx;
-      unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
+      unsigned max_loc = qual_location + var->type->uniform_locations() - 1;
 
       if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
          _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
@@ -2683,7 +2741,7 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
       }
 
       var->data.explicit_location = true;
-      var->data.location = qual->location;
+      var->data.location = qual_location;
       return;
    }
 
@@ -2768,30 +2826,40 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
       switch (state->stage) {
       case MESA_SHADER_VERTEX:
          var->data.location = (var->data.mode == ir_var_shader_in)
-            ? (qual->location + VERT_ATTRIB_GENERIC0)
-            : (qual->location + VARYING_SLOT_VAR0);
+            ? (qual_location + VERT_ATTRIB_GENERIC0)
+            : (qual_location + VARYING_SLOT_VAR0);
          break;
 
       case MESA_SHADER_TESS_CTRL:
       case MESA_SHADER_TESS_EVAL:
       case MESA_SHADER_GEOMETRY:
          if (var->data.patch)
-            var->data.location = qual->location + VARYING_SLOT_PATCH0;
+            var->data.location = qual_location + VARYING_SLOT_PATCH0;
          else
-            var->data.location = qual->location + VARYING_SLOT_VAR0;
+            var->data.location = qual_location + VARYING_SLOT_VAR0;
          break;
 
       case MESA_SHADER_FRAGMENT:
          var->data.location = (var->data.mode == ir_var_shader_out)
-            ? (qual->location + FRAG_RESULT_DATA0)
-            : (qual->location + VARYING_SLOT_VAR0);
+            ? (qual_location + FRAG_RESULT_DATA0)
+            : (qual_location + VARYING_SLOT_VAR0);
          break;
       case MESA_SHADER_COMPUTE:
          assert(!"Unexpected shader type");
          break;
       }
 
-      if (qual->flags.q.explicit_index) {
+      /* Check if index was set for the uniform instead of the function */
+      if (qual->flags.q.explicit_index && qual->flags.q.subroutine) {
+         _mesa_glsl_error(loc, state, "an index qualifier can only be "
+                          "used with subroutine functions");
+         return;
+      }
+
+      unsigned qual_index;
+      if (qual->flags.q.explicit_index &&
+          process_qualifier_constant(state, loc, "index", qual->index,
+                                     &qual_index)) {
          /* From the GLSL 4.30 specification, section 4.4.2 (Output
           * Layout Qualifiers):
           *
@@ -2801,12 +2869,12 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
           * Older specifications don't mandate a behavior; we take
           * this as a clarification and always generate the error.
           */
-         if (qual->index < 0 || qual->index > 1) {
+         if (qual_index > 1) {
             _mesa_glsl_error(loc, state,
                              "explicit index may only be 0 or 1");
          } else {
             var->data.explicit_index = true;
-            var->data.index = qual->index;
+            var->data.index = qual_index;
          }
       }
    }
@@ -2939,6 +3007,221 @@ validate_array_dimensions(const glsl_type *t,
 }
 
 static void
+apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
+                                   ir_variable *var,
+                                   struct _mesa_glsl_parse_state *state,
+                                   YYLTYPE *loc)
+{
+   if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
+
+      /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
+       *
+       *    "Within any shader, the first redeclarations of gl_FragCoord
+       *     must appear before any use of gl_FragCoord."
+       *
+       * Generate a compiler error if above condition is not met by the
+       * fragment shader.
+       */
+      ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
+      if (earlier != NULL &&
+          earlier->data.used &&
+          !state->fs_redeclares_gl_fragcoord) {
+         _mesa_glsl_error(loc, state,
+                          "gl_FragCoord used before its first redeclaration "
+                          "in fragment shader");
+      }
+
+      /* Make sure all gl_FragCoord redeclarations specify the same layout
+       * qualifiers.
+       */
+      if (is_conflicting_fragcoord_redeclaration(state, qual)) {
+         const char *const qual_string =
+            get_layout_qualifier_string(qual->flags.q.origin_upper_left,
+                                        qual->flags.q.pixel_center_integer);
+
+         const char *const state_string =
+            get_layout_qualifier_string(state->fs_origin_upper_left,
+                                        state->fs_pixel_center_integer);
+
+         _mesa_glsl_error(loc, state,
+                          "gl_FragCoord redeclared with different layout "
+                          "qualifiers (%s) and (%s) ",
+                          state_string,
+                          qual_string);
+      }
+      state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
+      state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
+      state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
+         !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
+      state->fs_redeclares_gl_fragcoord =
+         state->fs_origin_upper_left ||
+         state->fs_pixel_center_integer ||
+         state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
+   }
+
+   var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
+   var->data.origin_upper_left = qual->flags.q.origin_upper_left;
+   if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
+       && (strcmp(var->name, "gl_FragCoord") != 0)) {
+      const char *const qual_string = (qual->flags.q.origin_upper_left)
+         ? "origin_upper_left" : "pixel_center_integer";
+
+      _mesa_glsl_error(loc, state,
+		       "layout qualifier `%s' can only be applied to "
+		       "fragment shader input `gl_FragCoord'",
+		       qual_string);
+   }
+
+   if (qual->flags.q.explicit_location) {
+      apply_explicit_location(qual, var, state, loc);
+   } else if (qual->flags.q.explicit_index) {
+      if (!qual->flags.q.subroutine_def)
+         _mesa_glsl_error(loc, state,
+                          "explicit index requires explicit location");
+   }
+
+   if (qual->flags.q.explicit_binding) {
+      apply_explicit_binding(state, loc, var, var->type, qual);
+   }
+
+   if (state->stage == MESA_SHADER_GEOMETRY &&
+       qual->flags.q.out && qual->flags.q.stream) {
+      unsigned qual_stream;
+      if (process_qualifier_constant(state, loc, "stream", qual->stream,
+                                     &qual_stream) &&
+          validate_stream_qualifier(loc, state, qual_stream)) {
+         var->data.stream = qual_stream;
+      }
+   }
+
+   if (var->type->contains_atomic()) {
+      if (var->data.mode == ir_var_uniform) {
+         if (var->data.explicit_binding) {
+            unsigned *offset =
+               &state->atomic_counter_offsets[var->data.binding];
+
+            if (*offset % ATOMIC_COUNTER_SIZE)
+               _mesa_glsl_error(loc, state,
+                                "misaligned atomic counter offset");
+
+            var->data.atomic.offset = *offset;
+            *offset += var->type->atomic_size();
+
+         } else {
+            _mesa_glsl_error(loc, state,
+                             "atomic counters require explicit binding point");
+         }
+      } else if (var->data.mode != ir_var_function_in) {
+         _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
+                          "function parameters or uniform-qualified "
+                          "global variables");
+      }
+   }
+
+   /* Is the 'layout' keyword used with parameters that allow relaxed checking.
+    * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
+    * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
+    * allowed the layout qualifier to be used with 'varying' and 'attribute'.
+    * These extensions and all following extensions that add the 'layout'
+    * keyword have been modified to require the use of 'in' or 'out'.
+    *
+    * The following extension do not allow the deprecated keywords:
+    *
+    *    GL_AMD_conservative_depth
+    *    GL_ARB_conservative_depth
+    *    GL_ARB_gpu_shader5
+    *    GL_ARB_separate_shader_objects
+    *    GL_ARB_tessellation_shader
+    *    GL_ARB_transform_feedback3
+    *    GL_ARB_uniform_buffer_object
+    *
+    * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
+    * allow layout with the deprecated keywords.
+    */
+   const bool relaxed_layout_qualifier_checking =
+      state->ARB_fragment_coord_conventions_enable;
+
+   const bool uses_deprecated_qualifier = qual->flags.q.attribute
+      || qual->flags.q.varying;
+   if (qual->has_layout() && uses_deprecated_qualifier) {
+      if (relaxed_layout_qualifier_checking) {
+         _mesa_glsl_warning(loc, state,
+                            "`layout' qualifier may not be used with "
+                            "`attribute' or `varying'");
+      } else {
+         _mesa_glsl_error(loc, state,
+                          "`layout' qualifier may not be used with "
+                          "`attribute' or `varying'");
+      }
+   }
+
+   /* Layout qualifiers for gl_FragDepth, which are enabled by extension
+    * AMD_conservative_depth.
+    */
+   int depth_layout_count = qual->flags.q.depth_any
+      + qual->flags.q.depth_greater
+      + qual->flags.q.depth_less
+      + qual->flags.q.depth_unchanged;
+   if (depth_layout_count > 0
+       && !state->AMD_conservative_depth_enable
+       && !state->ARB_conservative_depth_enable) {
+       _mesa_glsl_error(loc, state,
+                        "extension GL_AMD_conservative_depth or "
+                        "GL_ARB_conservative_depth must be enabled "
+                        "to use depth layout qualifiers");
+   } else if (depth_layout_count > 0
+              && strcmp(var->name, "gl_FragDepth") != 0) {
+       _mesa_glsl_error(loc, state,
+                        "depth layout qualifiers can be applied only to "
+                        "gl_FragDepth");
+   } else if (depth_layout_count > 1
+              && strcmp(var->name, "gl_FragDepth") == 0) {
+      _mesa_glsl_error(loc, state,
+                       "at most one depth layout qualifier can be applied to "
+                       "gl_FragDepth");
+   }
+   if (qual->flags.q.depth_any)
+      var->data.depth_layout = ir_depth_layout_any;
+   else if (qual->flags.q.depth_greater)
+      var->data.depth_layout = ir_depth_layout_greater;
+   else if (qual->flags.q.depth_less)
+      var->data.depth_layout = ir_depth_layout_less;
+   else if (qual->flags.q.depth_unchanged)
+       var->data.depth_layout = ir_depth_layout_unchanged;
+   else
+       var->data.depth_layout = ir_depth_layout_none;
+
+   if (qual->flags.q.std140 ||
+       qual->flags.q.std430 ||
+       qual->flags.q.packed ||
+       qual->flags.q.shared) {
+      _mesa_glsl_error(loc, state,
+                       "uniform and shader storage block layout qualifiers "
+                       "std140, std430, packed, and shared can only be "
+                       "applied to uniform or shader storage blocks, not "
+                       "members");
+   }
+
+   if (qual->flags.q.row_major || qual->flags.q.column_major) {
+      validate_matrix_layout_for_type(state, loc, var->type, var);
+   }
+
+   /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
+    * Inputs):
+    *
+    *  "Fragment shaders also allow the following layout qualifier on in only
+    *   (not with variable declarations)
+    *     layout-qualifier-id
+    *        early_fragment_tests
+    *   [...]"
+    */
+   if (qual->flags.q.early_fragment_tests) {
+      _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
+                       "valid in fragment shader input layout declaration.");
+   }
+}
+
+static void
 apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
                                  ir_variable *var,
                                  struct _mesa_glsl_parse_state *state,
@@ -2992,11 +3275,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
          select_gles_precision(qual->precision, var->type, state, loc);
    }
 
-   if (state->stage == MESA_SHADER_GEOMETRY &&
-       qual->flags.q.out && qual->flags.q.stream) {
-      var->data.stream = qual->stream;
-   }
-
    if (qual->flags.q.patch)
       var->data.patch = 1;
 
@@ -3136,102 +3414,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
       interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode,
                                         state, loc);
 
-   var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
-   var->data.origin_upper_left = qual->flags.q.origin_upper_left;
-   if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
-       && (strcmp(var->name, "gl_FragCoord") != 0)) {
-      const char *const qual_string = (qual->flags.q.origin_upper_left)
-         ? "origin_upper_left" : "pixel_center_integer";
-
-      _mesa_glsl_error(loc, state,
-		       "layout qualifier `%s' can only be applied to "
-		       "fragment shader input `gl_FragCoord'",
-		       qual_string);
-   }
-
-   if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
-
-      /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
-       *
-       *    "Within any shader, the first redeclarations of gl_FragCoord
-       *     must appear before any use of gl_FragCoord."
-       *
-       * Generate a compiler error if above condition is not met by the
-       * fragment shader.
-       */
-      ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
-      if (earlier != NULL &&
-          earlier->data.used &&
-          !state->fs_redeclares_gl_fragcoord) {
-         _mesa_glsl_error(loc, state,
-                          "gl_FragCoord used before its first redeclaration "
-                          "in fragment shader");
-      }
-
-      /* Make sure all gl_FragCoord redeclarations specify the same layout
-       * qualifiers.
-       */
-      if (is_conflicting_fragcoord_redeclaration(state, qual)) {
-         const char *const qual_string =
-            get_layout_qualifier_string(qual->flags.q.origin_upper_left,
-                                        qual->flags.q.pixel_center_integer);
-
-         const char *const state_string =
-            get_layout_qualifier_string(state->fs_origin_upper_left,
-                                        state->fs_pixel_center_integer);
-
-         _mesa_glsl_error(loc, state,
-                          "gl_FragCoord redeclared with different layout "
-                          "qualifiers (%s) and (%s) ",
-                          state_string,
-                          qual_string);
-      }
-      state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
-      state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
-      state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
-         !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
-      state->fs_redeclares_gl_fragcoord =
-         state->fs_origin_upper_left ||
-         state->fs_pixel_center_integer ||
-         state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
-   }
-
-   if (qual->flags.q.explicit_location) {
-      validate_explicit_location(qual, var, state, loc);
-   } else if (qual->flags.q.explicit_index) {
-      _mesa_glsl_error(loc, state, "explicit index requires explicit location");
-   }
-
-   if (qual->flags.q.explicit_binding &&
-       validate_binding_qualifier(state, loc, var->type, qual)) {
-      var->data.explicit_binding = true;
-      var->data.binding = qual->binding;
-   }
-
-   if (var->type->contains_atomic()) {
-      if (var->data.mode == ir_var_uniform) {
-         if (var->data.explicit_binding) {
-            unsigned *offset =
-               &state->atomic_counter_offsets[var->data.binding];
-
-            if (*offset % ATOMIC_COUNTER_SIZE)
-               _mesa_glsl_error(loc, state,
-                                "misaligned atomic counter offset");
-
-            var->data.atomic.offset = *offset;
-            *offset += var->type->atomic_size();
-
-         } else {
-            _mesa_glsl_error(loc, state,
-                             "atomic counters require explicit binding point");
-         }
-      } else if (var->data.mode != ir_var_function_in) {
-         _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
-                          "function parameters or uniform-qualified "
-                          "global variables");
-      }
-   }
-
    /* Does the declaration use the deprecated 'attribute' or 'varying'
     * keywords?
     */
@@ -3267,114 +3449,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
                        "`out' or `varying' variables between shader stages");
    }
 
-
-   /* Is the 'layout' keyword used with parameters that allow relaxed checking.
-    * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
-    * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
-    * allowed the layout qualifier to be used with 'varying' and 'attribute'.
-    * These extensions and all following extensions that add the 'layout'
-    * keyword have been modified to require the use of 'in' or 'out'.
-    *
-    * The following extension do not allow the deprecated keywords:
-    *
-    *    GL_AMD_conservative_depth
-    *    GL_ARB_conservative_depth
-    *    GL_ARB_gpu_shader5
-    *    GL_ARB_separate_shader_objects
-    *    GL_ARB_tessellation_shader
-    *    GL_ARB_transform_feedback3
-    *    GL_ARB_uniform_buffer_object
-    *
-    * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
-    * allow layout with the deprecated keywords.
-    */
-   const bool relaxed_layout_qualifier_checking =
-      state->ARB_fragment_coord_conventions_enable;
-
-   if (qual->has_layout() && uses_deprecated_qualifier) {
-      if (relaxed_layout_qualifier_checking) {
-         _mesa_glsl_warning(loc, state,
-                            "`layout' qualifier may not be used with "
-                            "`attribute' or `varying'");
-      } else {
-         _mesa_glsl_error(loc, state,
-                          "`layout' qualifier may not be used with "
-                          "`attribute' or `varying'");
-      }
-   }
-
-   /* Layout qualifiers for gl_FragDepth, which are enabled by extension
-    * AMD_conservative_depth.
-    */
-   int depth_layout_count = qual->flags.q.depth_any
-      + qual->flags.q.depth_greater
-      + qual->flags.q.depth_less
-      + qual->flags.q.depth_unchanged;
-   if (depth_layout_count > 0
-       && !state->AMD_conservative_depth_enable
-       && !state->ARB_conservative_depth_enable) {
-       _mesa_glsl_error(loc, state,
-                        "extension GL_AMD_conservative_depth or "
-                        "GL_ARB_conservative_depth must be enabled "
-                        "to use depth layout qualifiers");
-   } else if (depth_layout_count > 0
-              && strcmp(var->name, "gl_FragDepth") != 0) {
-       _mesa_glsl_error(loc, state,
-                        "depth layout qualifiers can be applied only to "
-                        "gl_FragDepth");
-   } else if (depth_layout_count > 1
-              && strcmp(var->name, "gl_FragDepth") == 0) {
-      _mesa_glsl_error(loc, state,
-                       "at most one depth layout qualifier can be applied to "
-                       "gl_FragDepth");
-   }
-   if (qual->flags.q.depth_any)
-      var->data.depth_layout = ir_depth_layout_any;
-   else if (qual->flags.q.depth_greater)
-      var->data.depth_layout = ir_depth_layout_greater;
-   else if (qual->flags.q.depth_less)
-      var->data.depth_layout = ir_depth_layout_less;
-   else if (qual->flags.q.depth_unchanged)
-       var->data.depth_layout = ir_depth_layout_unchanged;
-   else
-       var->data.depth_layout = ir_depth_layout_none;
-
-   if (qual->flags.q.std140 ||
-       qual->flags.q.std430 ||
-       qual->flags.q.packed ||
-       qual->flags.q.shared) {
-      _mesa_glsl_error(loc, state,
-                       "uniform and shader storage block layout qualifiers "
-                       "std140, std430, packed, and shared can only be "
-                       "applied to uniform or shader storage blocks, not "
-                       "members");
-   }
-
    if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) {
       _mesa_glsl_error(loc, state,
                        "the shared storage qualifiers can only be used with "
                        "compute shaders");
    }
 
-   if (qual->flags.q.row_major || qual->flags.q.column_major) {
-      validate_matrix_layout_for_type(state, loc, var->type, var);
-   }
-
    apply_image_qualifier_to_variable(qual, var, state, loc);
-
-   /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
-    * Inputs):
-    *
-    *  "Fragment shaders also allow the following layout qualifier on in only
-    *   (not with variable declarations)
-    *     layout-qualifier-id
-    *        early_fragment_tests
-    *   [...]"
-    */
-   if (qual->flags.q.early_fragment_tests) {
-      _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
-                       "valid in fragment shader input layout declaration.");
-   }
 }
 
 /**
@@ -3798,7 +3879,17 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
    unsigned num_vertices = 0;
 
    if (state->tcs_output_vertices_specified) {
-      num_vertices = state->out_qualifier->vertices;
+      if (!state->out_qualifier->vertices->
+             process_qualifier_constant(state, "vertices",
+                                        &num_vertices, false)) {
+         return;
+      }
+
+      if (num_vertices > state->Const.MaxPatchVertices) {
+         _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+                          "GL_MAX_PATCH_VERTICES", num_vertices);
+         return;
+      }
    }
 
    if (!var->type->is_array() && !var->data.patch) {
@@ -4032,9 +4123,18 @@ ast_declarator_list::hir(exec_list *instructions,
     */
    if (decl_type && decl_type->contains_atomic()) {
       if (type->qualifier.flags.q.explicit_binding &&
-          type->qualifier.flags.q.explicit_offset)
-         state->atomic_counter_offsets[type->qualifier.binding] =
-            type->qualifier.offset;
+          type->qualifier.flags.q.explicit_offset) {
+         unsigned qual_binding;
+         unsigned qual_offset;
+         if (process_qualifier_constant(state, &loc, "binding",
+                                        type->qualifier.binding,
+                                        &qual_binding)
+             && process_qualifier_constant(state, &loc, "offset",
+                                        type->qualifier.offset,
+                                        &qual_offset)) {
+            state->atomic_counter_offsets[qual_binding] = qual_offset;
+         }
+      }
    }
 
    if (this->declarations.is_empty()) {
@@ -4188,6 +4288,8 @@ ast_declarator_list::hir(exec_list *instructions,
 
       apply_type_qualifier_to_variable(& this->type->qualifier, var, state,
 				       & loc, false);
+      apply_layout_qualifier_to_variable(&this->type->qualifier, var, state,
+                                         &loc);
 
       if (this->type->qualifier.flags.q.invariant) {
          if (!is_varying_var(var, state->stage)) {
@@ -4983,7 +5085,7 @@ ast_function::hir(exec_list *instructions,
    /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec:
     * "No qualifier is allowed on the return type of a function."
     */
-   if (this->return_type->has_qualifiers()) {
+   if (this->return_type->has_qualifiers(state)) {
       YYLTYPE loc = this->get_location();
       _mesa_glsl_error(& loc, state,
                        "function `%s' return type has qualifiers", name);
@@ -5115,6 +5217,27 @@ ast_function::hir(exec_list *instructions,
    if (this->return_type->qualifier.flags.q.subroutine_def) {
       int idx;
 
+      if (this->return_type->qualifier.flags.q.explicit_index) {
+         unsigned qual_index;
+         if (process_qualifier_constant(state, &loc, "index",
+                                        this->return_type->qualifier.index,
+                                        &qual_index)) {
+            if (!state->has_explicit_uniform_location()) {
+               _mesa_glsl_error(&loc, state, "subroutine index requires "
+                                "GL_ARB_explicit_uniform_location or "
+                                "GLSL 4.30");
+            } else if (qual_index >= MAX_SUBROUTINES) {
+               _mesa_glsl_error(&loc, state,
+                                "invalid subroutine index (%d) index must "
+                                "be a number between 0 and "
+                                "GL_MAX_SUBROUTINES - 1 (%d)", qual_index,
+                                MAX_SUBROUTINES - 1);
+            } else {
+               f->subroutine_index = qual_index;
+            }
+         }
+      }
+
       f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length();
       f->subroutine_types = ralloc_array(state, const struct glsl_type *,
                                          f->num_subroutine_types);
@@ -6046,27 +6169,19 @@ ast_type_specifier::hir(exec_list *instructions,
  * stored in \c *fields_ret.
  */
 unsigned
-ast_process_structure_or_interface_block(exec_list *instructions,
-                                         struct _mesa_glsl_parse_state *state,
-                                         exec_list *declarations,
-                                         YYLTYPE &loc,
-                                         glsl_struct_field **fields_ret,
-                                         bool is_interface,
-                                         enum glsl_matrix_layout matrix_layout,
-                                         bool allow_reserved_names,
-                                         ir_variable_mode var_mode,
-                                         ast_type_qualifier *layout)
+ast_process_struct_or_iface_block_members(exec_list *instructions,
+                                          struct _mesa_glsl_parse_state *state,
+                                          exec_list *declarations,
+                                          glsl_struct_field **fields_ret,
+                                          bool is_interface,
+                                          enum glsl_matrix_layout matrix_layout,
+                                          bool allow_reserved_names,
+                                          ir_variable_mode var_mode,
+                                          ast_type_qualifier *layout,
+                                          unsigned block_stream)
 {
    unsigned decl_count = 0;
 
-   /* For blocks that accept memory qualifiers (i.e. shader storage), verify
-    * that we don't have incompatible qualifiers
-    */
-   if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
-      _mesa_glsl_error(&loc, state,
-                       "Interface block sets both readonly and writeonly");
-   }
-
    /* Make an initial pass over the list of fields to determine how
     * many there are.  Each element in this list is an ast_declarator_list.
     * This means that we actually need to count the number of elements in the
@@ -6087,6 +6202,7 @@ ast_process_structure_or_interface_block(exec_list *instructions,
    unsigned i = 0;
    foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
       const char *type_name;
+      YYLTYPE loc = decl_list->get_location();
 
       decl_list->type->specifier->hir(instructions, state);
 
@@ -6101,74 +6217,120 @@ ast_process_structure_or_interface_block(exec_list *instructions,
       const glsl_type *decl_type =
          decl_list->type->glsl_type(& type_name, state);
 
-      foreach_list_typed (ast_declaration, decl, link,
-                          &decl_list->declarations) {
-         if (!allow_reserved_names)
-            validate_identifier(decl->identifier, loc, state);
+      const struct ast_type_qualifier *const qual =
+         &decl_list->type->qualifier;
 
-         /* From section 4.3.9 of the GLSL 4.40 spec:
-          *
-          *    "[In interface blocks] opaque types are not allowed."
+      /* From section 4.3.9 of the GLSL 4.40 spec:
+       *
+       *    "[In interface blocks] opaque types are not allowed."
+       *
+       * It should be impossible for decl_type to be NULL here.  Cases that
+       * might naturally lead to decl_type being NULL, especially for the
+       * is_interface case, will have resulted in compilation having
+       * already halted due to a syntax error.
+       */
+      assert(decl_type);
+
+      if (is_interface && decl_type->contains_opaque()) {
+         _mesa_glsl_error(&loc, state,
+                          "uniform/buffer in non-default interface block contains "
+                          "opaque variable");
+      }
+
+      if (decl_type->contains_atomic()) {
+         /* From section 4.1.7.3 of the GLSL 4.40 spec:
           *
-          * It should be impossible for decl_type to be NULL here.  Cases that
-          * might naturally lead to decl_type being NULL, especially for the
-          * is_interface case, will have resulted in compilation having
-          * already halted due to a syntax error.
+          *    "Members of structures cannot be declared as atomic counter
+          *     types."
           */
-         assert(decl_type);
+         _mesa_glsl_error(&loc, state, "atomic counter in structure, "
+                          "shader storage block or uniform block");
+      }
 
-         if (is_interface && decl_type->contains_opaque()) {
-            YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state,
-                             "uniform/buffer in non-default interface block contains "
-                             "opaque variable");
-         }
+      if (decl_type->contains_image()) {
+         /* FINISHME: Same problem as with atomic counters.
+          * FINISHME: Request clarification from Khronos and add
+          * FINISHME: spec quotation here.
+          */
+         _mesa_glsl_error(&loc, state,
+                          "image in structure, shader storage block or "
+                          "uniform block");
+      }
 
-         if (decl_type->contains_atomic()) {
-            /* From section 4.1.7.3 of the GLSL 4.40 spec:
-             *
-             *    "Members of structures cannot be declared as atomic counter
-             *     types."
-             */
-            YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state, "atomic counter in structure, "
-                             "shader storage block or uniform block");
-         }
+      if (qual->flags.q.explicit_binding) {
+         _mesa_glsl_error(&loc, state,
+                          "binding layout qualifier cannot be applied "
+                          "to struct or interface block members");
+      }
 
-         if (decl_type->contains_image()) {
-            /* FINISHME: Same problem as with atomic counters.
-             * FINISHME: Request clarification from Khronos and add
-             * FINISHME: spec quotation here.
-             */
-            YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state,
-                             "image in structure, shader storage block or "
-                             "uniform block");
+      if (qual->flags.q.std140 ||
+          qual->flags.q.std430 ||
+          qual->flags.q.packed ||
+          qual->flags.q.shared) {
+         _mesa_glsl_error(&loc, state,
+                          "uniform/shader storage block layout qualifiers "
+                          "std140, std430, packed, and shared can only be "
+                          "applied to uniform/shader storage blocks, not "
+                          "members");
+      }
+
+      if (qual->flags.q.constant) {
+         _mesa_glsl_error(&loc, state,
+                          "const storage qualifier cannot be applied "
+                          "to struct or interface block members");
+      }
+
+      /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
+       *
+       *   "A block member may be declared with a stream identifier, but
+       *   the specified stream must match the stream associated with the
+       *   containing block."
+       */
+      if (qual->flags.q.explicit_stream) {
+         unsigned qual_stream;
+         if (process_qualifier_constant(state, &loc, "stream",
+                                        qual->stream, &qual_stream) &&
+             qual_stream != block_stream) {
+            _mesa_glsl_error(&loc, state, "stream layout qualifier on "
+                             "interface block member does not match "
+                             "the interface block (%d vs %d)", qual->stream,
+                             block_stream);
          }
+      }
 
-         const struct ast_type_qualifier *const qual =
-            & decl_list->type->qualifier;
+      if (qual->flags.q.uniform && qual->has_interpolation()) {
+         _mesa_glsl_error(&loc, state,
+                          "interpolation qualifiers cannot be used "
+                          "with uniform interface blocks");
+      }
 
-         if (qual->flags.q.explicit_binding)
-            validate_binding_qualifier(state, &loc, decl_type, qual);
+      if ((qual->flags.q.uniform || !is_interface) &&
+          qual->has_auxiliary_storage()) {
+         _mesa_glsl_error(&loc, state,
+                          "auxiliary storage qualifiers cannot be used "
+                          "in uniform blocks or structures.");
+      }
 
-         if (qual->flags.q.std140 ||
-             qual->flags.q.std430 ||
-             qual->flags.q.packed ||
-             qual->flags.q.shared) {
+      if (qual->flags.q.row_major || qual->flags.q.column_major) {
+         if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
             _mesa_glsl_error(&loc, state,
-                             "uniform/shader storage block layout qualifiers "
-                             "std140, std430, packed, and shared can only be "
-                             "applied to uniform/shader storage blocks, not "
-                             "members");
-         }
+                             "row_major and column_major can only be "
+                             "applied to interface blocks");
+         } else
+            validate_matrix_layout_for_type(state, &loc, decl_type, NULL);
+      }
 
-         if (qual->flags.q.constant) {
-            YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state,
-                             "const storage qualifier cannot be applied "
-                             "to struct or interface block members");
-         }
+      if (qual->flags.q.read_only && qual->flags.q.write_only) {
+         _mesa_glsl_error(&loc, state, "buffer variable can't be both "
+                          "readonly and writeonly.");
+      }
+
+      foreach_list_typed (ast_declaration, decl, link,
+                          &decl_list->declarations) {
+         YYLTYPE loc = decl->get_location();
+
+         if (!allow_reserved_names)
+            validate_identifier(decl->identifier, loc, state);
 
          const struct glsl_type *field_type =
             process_array_type(&loc, decl_type, decl->array_specifier, state);
@@ -6183,42 +6345,6 @@ ast_process_structure_or_interface_block(exec_list *instructions,
          fields[i].patch = qual->flags.q.patch ? 1 : 0;
          fields[i].precision = qual->precision;
 
-         /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
-          *
-          *   "A block member may be declared with a stream identifier, but
-          *   the specified stream must match the stream associated with the
-          *   containing block."
-          */
-         if (qual->flags.q.explicit_stream &&
-             qual->stream != layout->stream) {
-            _mesa_glsl_error(&loc, state, "stream layout qualifier on "
-                             "interface block member `%s' does not match "
-                             "the interface block (%d vs %d)",
-                             fields[i].name, qual->stream, layout->stream);
-         }
-
-         if (qual->flags.q.row_major || qual->flags.q.column_major) {
-            if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
-               _mesa_glsl_error(&loc, state,
-                                "row_major and column_major can only be "
-                                "applied to interface blocks");
-            } else
-               validate_matrix_layout_for_type(state, &loc, field_type, NULL);
-         }
-
-         if (qual->flags.q.uniform && qual->has_interpolation()) {
-            _mesa_glsl_error(&loc, state,
-                             "interpolation qualifiers cannot be used "
-                             "with uniform interface blocks");
-         }
-
-         if ((qual->flags.q.uniform || !is_interface) &&
-             qual->has_auxiliary_storage()) {
-            _mesa_glsl_error(&loc, state,
-                             "auxiliary storage qualifiers cannot be used "
-                             "in uniform blocks or structures.");
-         }
-
          /* Propogate row- / column-major information down the fields of the
           * structure or interface block.  Structures need this data because
           * the structure may contain a structure that contains ... a matrix
@@ -6248,29 +6374,20 @@ ast_process_structure_or_interface_block(exec_list *instructions,
           * be defined inside shader storage buffer objects
           */
          if (layout && var_mode == ir_var_shader_storage) {
-            if (qual->flags.q.read_only && qual->flags.q.write_only) {
-               _mesa_glsl_error(&loc, state,
-                                "buffer variable `%s' can't be "
-                                "readonly and writeonly.", fields[i].name);
-            }
-
             /* For readonly and writeonly qualifiers the field definition,
              * if set, overwrites the layout qualifier.
              */
-            bool read_only = layout->flags.q.read_only;
-            bool write_only = layout->flags.q.write_only;
-
             if (qual->flags.q.read_only) {
-               read_only = true;
-               write_only = false;
+               fields[i].image_read_only = true;
+               fields[i].image_write_only = false;
             } else if (qual->flags.q.write_only) {
-               read_only = false;
-               write_only = true;
+               fields[i].image_read_only = false;
+               fields[i].image_write_only = true;
+            } else {
+               fields[i].image_read_only = layout->flags.q.read_only;
+               fields[i].image_write_only = layout->flags.q.write_only;
             }
 
-            fields[i].image_read_only = read_only;
-            fields[i].image_write_only = write_only;
-
             /* For other qualifiers, we set the flag if either the layout
              * qualifier or the field qualifier are set
              */
@@ -6328,16 +6445,16 @@ ast_struct_specifier::hir(exec_list *instructions,
 
    glsl_struct_field *fields;
    unsigned decl_count =
-      ast_process_structure_or_interface_block(instructions,
-                                               state,
-                                               &this->declarations,
-                                               loc,
-                                               &fields,
-                                               false,
-                                               GLSL_MATRIX_LAYOUT_INHERITED,
-                                               false /* allow_reserved_names */,
-                                               ir_var_auto,
-                                               NULL);
+      ast_process_struct_or_iface_block_members(instructions,
+                                                state,
+                                                &this->declarations,
+                                                &fields,
+                                                false,
+                                                GLSL_MATRIX_LAYOUT_INHERITED,
+                                                false /* allow_reserved_names */,
+                                                ir_var_auto,
+                                                NULL,
+                                                0 /* for interface only */);
 
    validate_identifier(this->name, loc, state);
 
@@ -6483,17 +6600,36 @@ ast_interface_block::hir(exec_list *instructions,
     */
    state->struct_specifier_depth++;
 
+   /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+    * that we don't have incompatible qualifiers
+    */
+   if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) {
+      _mesa_glsl_error(&loc, state,
+                       "Interface block sets both readonly and writeonly");
+   }
+
+   unsigned qual_stream;
+   if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream,
+                                   &qual_stream) ||
+       !validate_stream_qualifier(&loc, state, qual_stream)) {
+      /* If the stream qualifier is invalid it doesn't make sense to continue
+       * on and try to compare stream layouts on member variables against it
+       * so just return early.
+       */
+      return NULL;
+   }
+
    unsigned int num_variables =
-      ast_process_structure_or_interface_block(&declared_variables,
-                                               state,
-                                               &this->declarations,
-                                               loc,
-                                               &fields,
-                                               true,
-                                               matrix_layout,
-                                               redeclaring_per_vertex,
-                                               var_mode,
-                                               &this->layout);
+      ast_process_struct_or_iface_block_members(&declared_variables,
+                                                state,
+                                                &this->declarations,
+                                                &fields,
+                                                true,
+                                                matrix_layout,
+                                                redeclaring_per_vertex,
+                                                var_mode,
+                                                &this->layout,
+                                                qual_stream);
 
    state->struct_specifier_depth--;
 
@@ -6604,6 +6740,8 @@ ast_interface_block::hir(exec_list *instructions,
                earlier_per_vertex->fields.structure[j].sample;
             fields[i].patch =
                earlier_per_vertex->fields.structure[j].patch;
+            fields[i].precision =
+               earlier_per_vertex->fields.structure[j].precision;
          }
       }
 
@@ -6633,8 +6771,6 @@ ast_interface_block::hir(exec_list *instructions,
                                         num_variables,
                                         packing,
                                         this->block_name);
-   if (this->layout.flags.q.explicit_binding)
-      validate_binding_qualifier(state, &loc, block_type, &this->layout);
 
    if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
       YYLTYPE loc = this->get_location();
@@ -6765,10 +6901,6 @@ ast_interface_block::hir(exec_list *instructions,
                              "not allowed");
          }
 
-         if (this->layout.flags.q.explicit_binding)
-            validate_binding_qualifier(state, &loc, block_array_type,
-                                       &this->layout);
-
          var = new(state) ir_variable(block_array_type,
                                       this->instance_name,
                                       var_mode);
@@ -6830,14 +6962,12 @@ ast_interface_block::hir(exec_list *instructions,
          earlier->reinit_interface_type(block_type);
          delete var;
       } else {
-         /* Propagate the "binding" keyword into this UBO's fields;
-          * the UBO declaration itself doesn't get an ir_variable unless it
-          * has an instance name.  This is ugly.
-          */
-         var->data.explicit_binding = this->layout.flags.q.explicit_binding;
-         var->data.binding = this->layout.binding;
+         if (this->layout.flags.q.explicit_binding) {
+            apply_explicit_binding(state, &loc, var, var->type,
+                                   &this->layout);
+         }
 
-         var->data.stream = this->layout.stream;
+         var->data.stream = qual_stream;
 
          state->symbols->add_variable(var);
          instructions->push_tail(var);
@@ -6857,7 +6987,7 @@ ast_interface_block::hir(exec_list *instructions,
          var->data.centroid = fields[i].centroid;
          var->data.sample = fields[i].sample;
          var->data.patch = fields[i].patch;
-         var->data.stream = this->layout.stream;
+         var->data.stream = qual_stream;
          var->init_interface_type(block_type);
 
          if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
@@ -6914,8 +7044,10 @@ ast_interface_block::hir(exec_list *instructions,
           * The UBO declaration itself doesn't get an ir_variable unless it
           * has an instance name.  This is ugly.
           */
-         var->data.explicit_binding = this->layout.flags.q.explicit_binding;
-         var->data.binding = this->layout.binding;
+         if (this->layout.flags.q.explicit_binding) {
+            apply_explicit_binding(state, &loc, var,
+                                   var->get_interface_type(), &this->layout);
+         }
 
          if (var->type->is_unsized_array()) {
             if (var->is_in_shader_storage_block()) {
@@ -6997,22 +7129,18 @@ ast_tcs_output_layout::hir(exec_list *instructions,
 {
    YYLTYPE loc = this->get_location();
 
-   /* If any tessellation control output layout declaration preceded this
-    * one, make sure it was consistent with this one.
-    */
-   if (state->tcs_output_vertices_specified &&
-       state->out_qualifier->vertices != this->vertices) {
-      _mesa_glsl_error(&loc, state,
-		       "tessellation control shader output layout does not "
-		       "match previous declaration");
-      return NULL;
+   unsigned num_vertices;
+   if (!state->out_qualifier->vertices->
+          process_qualifier_constant(state, "vertices", &num_vertices,
+                                     false)) {
+      /* return here to stop cascading incorrect error messages */
+     return NULL;
    }
 
    /* If any shader outputs occurred before this declaration and specified an
     * array size, make sure the size they specified is consistent with the
     * primitive type.
     */
-   unsigned num_vertices = this->vertices;
    if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) {
       _mesa_glsl_error(&loc, state,
 		       "this tessellation control shader output layout "
@@ -7120,20 +7248,6 @@ ast_cs_input_layout::hir(exec_list *instructions,
 {
    YYLTYPE loc = this->get_location();
 
-   /* If any compute input layout declaration preceded this one, make sure it
-    * was consistent with this one.
-    */
-   if (state->cs_input_local_size_specified) {
-      for (int i = 0; i < 3; i++) {
-         if (state->cs_input_local_size[i] != this->local_size[i]) {
-            _mesa_glsl_error(&loc, state,
-                             "compute shader input layout does not match"
-                             " previous declaration");
-            return NULL;
-         }
-      }
-   }
-
    /* From the ARB_compute_shader specification:
     *
     *     If the local size of the shader in any dimension is greater
@@ -7146,15 +7260,30 @@ ast_cs_input_layout::hir(exec_list *instructions,
     * report it at compile time as well.
     */
    GLuint64 total_invocations = 1;
+   unsigned qual_local_size[3];
    for (int i = 0; i < 3; i++) {
-      if (this->local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
+
+      char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c",
+                                             'x' + i);
+      /* Infer a local_size of 1 for unspecified dimensions */
+      if (this->local_size[i] == NULL) {
+         qual_local_size[i] = 1;
+      } else if (!this->local_size[i]->
+             process_qualifier_constant(state, local_size_str,
+                                        &qual_local_size[i], false)) {
+         ralloc_free(local_size_str);
+         return NULL;
+      }
+      ralloc_free(local_size_str);
+
+      if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
          _mesa_glsl_error(&loc, state,
                           "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE"
                           " (%d)", 'x' + i,
                           state->ctx->Const.MaxComputeWorkGroupSize[i]);
          break;
       }
-      total_invocations *= this->local_size[i];
+      total_invocations *= qual_local_size[i];
       if (total_invocations >
           state->ctx->Const.MaxComputeWorkGroupInvocations) {
          _mesa_glsl_error(&loc, state,
@@ -7165,9 +7294,23 @@ ast_cs_input_layout::hir(exec_list *instructions,
       }
    }
 
+   /* If any compute input layout declaration preceded this one, make sure it
+    * was consistent with this one.
+    */
+   if (state->cs_input_local_size_specified) {
+      for (int i = 0; i < 3; i++) {
+         if (state->cs_input_local_size[i] != qual_local_size[i]) {
+            _mesa_glsl_error(&loc, state,
+                             "compute shader input layout does not match"
+                             " previous declaration");
+            return NULL;
+         }
+      }
+   }
+
    state->cs_input_local_size_specified = true;
    for (int i = 0; i < 3; i++)
-      state->cs_input_local_size[i] = this->local_size[i];
+      state->cs_input_local_size[i] = qual_local_size[i];
 
    /* We may now declare the built-in constant gl_WorkGroupSize (see
     * builtin_variable_generator::generate_constants() for why we didn't
@@ -7182,7 +7325,7 @@ ast_cs_input_layout::hir(exec_list *instructions,
    ir_constant_data data;
    memset(&data, 0, sizeof(data));
    for (int i = 0; i < 3; i++)
-      data.u[i] = this->local_size[i];
+      data.u[i] = qual_local_size[i];
    var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data);
    var->constant_initializer =
       new(var) ir_constant(glsl_type::uvec3_type, &data);
@@ -7198,6 +7341,8 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
 {
    bool gl_FragColor_assigned = false;
    bool gl_FragData_assigned = false;
+   bool gl_FragSecondaryColor_assigned = false;
+   bool gl_FragSecondaryData_assigned = false;
    bool user_defined_fs_output_assigned = false;
    ir_variable *user_defined_fs_output = NULL;
 
@@ -7215,6 +7360,10 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
          gl_FragColor_assigned = true;
       else if (strcmp(var->name, "gl_FragData") == 0)
          gl_FragData_assigned = true;
+	else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0)
+         gl_FragSecondaryColor_assigned = true;
+	else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0)
+         gl_FragSecondaryData_assigned = true;
       else if (!is_gl_identifier(var->name)) {
          if (state->stage == MESA_SHADER_FRAGMENT &&
              var->data.mode == ir_var_shader_out) {
@@ -7246,11 +7395,29 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
                        "`gl_FragColor' and `%s'",
                        user_defined_fs_output->name);
+   } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) {
+      _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                       "`gl_FragSecondaryColorEXT' and"
+                       " `gl_FragSecondaryDataEXT'");
+   } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) {
+      _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                       "`gl_FragColor' and"
+                       " `gl_FragSecondaryDataEXT'");
+   } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) {
+      _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+                       "`gl_FragData' and"
+                       " `gl_FragSecondaryColorEXT'");
    } else if (gl_FragData_assigned && user_defined_fs_output_assigned) {
       _mesa_glsl_error(&loc, state, "fragment shader writes to both "
                        "`gl_FragData' and `%s'",
                        user_defined_fs_output->name);
    }
+
+   if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) &&
+       !state->EXT_blend_func_extended_enable) {
+      _mesa_glsl_error(&loc, state,
+                       "Dual source blending requires EXT_blend_func_extended");
+   }
 }
 
 
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 79134c19893..03ed4dcfa2a 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -38,13 +38,16 @@ ast_type_specifier::print(void) const
 }
 
 bool
-ast_fully_specified_type::has_qualifiers() const
+ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const
 {
    /* 'subroutine' isnt a real qualifier. */
    ast_type_qualifier subroutine_only;
    subroutine_only.flags.i = 0;
    subroutine_only.flags.q.subroutine = 1;
    subroutine_only.flags.q.subroutine_def = 1;
+   if (state->has_explicit_uniform_location()) {
+      subroutine_only.flags.q.explicit_index = 1;
+   }
    return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0;
 }
 
@@ -169,41 +172,32 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
    }
 
    if (q.flags.q.max_vertices) {
-      if (this->flags.q.max_vertices && this->max_vertices != q.max_vertices) {
+      if (this->max_vertices) {
+         this->max_vertices->merge_qualifier(q.max_vertices);
+      } else {
+         this->max_vertices = q.max_vertices;
+      }
+   }
+
+   if (q.flags.q.subroutine_def) {
+      if (this->flags.q.subroutine_def) {
 	 _mesa_glsl_error(loc, state,
-			  "geometry shader set conflicting max_vertices "
-			  "(%d and %d)", this->max_vertices, q.max_vertices);
-	 return false;
+			  "conflicting subroutine qualifiers used");
+      } else {
+         this->subroutine_list = q.subroutine_list;
       }
-      this->max_vertices = q.max_vertices;
    }
 
    if (q.flags.q.invocations) {
-      if (this->flags.q.invocations && this->invocations != q.invocations) {
-         _mesa_glsl_error(loc, state,
-                          "geometry shader set conflicting invocations "
-                          "(%d and %d)", this->invocations, q.invocations);
-         return false;
+      if (this->invocations) {
+         this->invocations->merge_qualifier(q.invocations);
+      } else {
+         this->invocations = q.invocations;
       }
-      this->invocations = q.invocations;
    }
 
    if (state->stage == MESA_SHADER_GEOMETRY &&
        state->has_explicit_attrib_stream()) {
-      if (q.flags.q.stream && q.stream >= state->ctx->Const.MaxVertexStreams) {
-         _mesa_glsl_error(loc, state,
-                          "`stream' value is larger than MAX_VERTEX_STREAMS - 1 "
-                          "(%d > %d)",
-                          q.stream, state->ctx->Const.MaxVertexStreams - 1);
-      }
-      if (this->flags.q.explicit_stream &&
-          this->stream >= state->ctx->Const.MaxVertexStreams) {
-         _mesa_glsl_error(loc, state,
-                          "`stream' value is larger than MAX_VERTEX_STREAMS - 1 "
-                          "(%d > %d)",
-                          this->stream, state->ctx->Const.MaxVertexStreams - 1);
-      }
-
       if (!this->flags.q.explicit_stream) {
          if (q.flags.q.stream) {
             this->flags.q.stream = 1;
@@ -222,14 +216,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
    }
 
    if (q.flags.q.vertices) {
-      if (this->flags.q.vertices && this->vertices != q.vertices) {
-	 _mesa_glsl_error(loc, state,
-			  "tessellation control shader set conflicting "
-			  "vertices (%d and %d)",
-			  this->vertices, q.vertices);
-	 return false;
+      if (this->vertices) {
+         this->vertices->merge_qualifier(q.vertices);
+      } else {
+         this->vertices = q.vertices;
       }
-      this->vertices = q.vertices;
    }
 
    if (q.flags.q.vertex_spacing) {
@@ -266,15 +257,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
 
    for (int i = 0; i < 3; i++) {
       if (q.flags.q.local_size & (1 << i)) {
-         if ((this->flags.q.local_size & (1 << i)) &&
-             this->local_size[i] != q.local_size[i]) {
-            _mesa_glsl_error(loc, state,
-                             "compute shader set conflicting values for "
-                             "local_size_%c (%d and %d)", 'x' + i,
-                             this->local_size[i], q.local_size[i]);
-            return false;
+         if (this->local_size[i]) {
+            this->local_size[i]->merge_qualifier(q.local_size[i]);
+         } else {
+            this->local_size[i] = q.local_size[i];
          }
-         this->local_size[i] = q.local_size[i];
       }
    }
 
@@ -313,7 +300,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
    const bool r = this->merge_qualifier(loc, state, q);
 
    if (state->stage == MESA_SHADER_TESS_CTRL) {
-      node = new(mem_ctx) ast_tcs_output_layout(*loc, q.vertices);
+      node = new(mem_ctx) ast_tcs_output_layout(*loc);
    }
 
    return r;
@@ -417,15 +404,13 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
       state->in_qualifier->prim_type = q.prim_type;
    }
 
-   if (this->flags.q.invocations &&
-       q.flags.q.invocations &&
-       this->invocations != q.invocations) {
-      _mesa_glsl_error(loc, state,
-                       "conflicting invocations counts specified");
-      return false;
-   } else if (q.flags.q.invocations) {
+   if (q.flags.q.invocations) {
       this->flags.q.invocations = 1;
-      this->invocations = q.invocations;
+      if (this->invocations) {
+         this->invocations->merge_qualifier(q.invocations);
+      } else {
+         this->invocations = q.invocations;
+      }
    }
 
    if (q.flags.q.early_fragment_tests) {
@@ -468,15 +453,67 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
    if (create_gs_ast) {
       node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type);
    } else if (create_cs_ast) {
-      /* Infer a local_size of 1 for every unspecified dimension */
-      unsigned local_size[3];
-      for (int i = 0; i < 3; i++) {
-         if (q.flags.q.local_size & (1 << i))
-            local_size[i] = q.local_size[i];
-         else
-            local_size[i] = 1;
+      node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size);
+   }
+
+   return true;
+}
+
+bool
+ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+                                                  const char *qual_indentifier,
+                                                  unsigned *value,
+                                                  bool can_be_zero)
+{
+   int min_value = 0;
+   bool first_pass = true;
+   *value = 0;
+
+   if (!can_be_zero)
+      min_value = 1;
+
+   for (exec_node *node = layout_const_expressions.head;
+           !node->is_tail_sentinel(); node = node->next) {
+
+      exec_list dummy_instructions;
+      ast_node *const_expression = exec_node_data(ast_node, node, link);
+
+      ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+      ir_constant *const const_int = ir->constant_expression_value();
+      if (const_int == NULL || !const_int->type->is_integer()) {
+         YYLTYPE loc = const_expression->get_location();
+         _mesa_glsl_error(&loc, state, "%s must be an integral constant "
+                          "expression", qual_indentifier);
+         return false;
+      }
+
+      if (const_int->value.i[0] < min_value) {
+         YYLTYPE loc = const_expression->get_location();
+         _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid "
+                          "(%d < %d)", qual_indentifier,
+                          const_int->value.i[0], min_value);
+         return false;
       }
-      node = new(mem_ctx) ast_cs_input_layout(*loc, local_size);
+
+      if (!first_pass && *value != const_int->value.u[0]) {
+         YYLTYPE loc = const_expression->get_location();
+         _mesa_glsl_error(&loc, state, "%s layout qualifier does not "
+		          "match previous declaration (%d vs %d)",
+                          qual_indentifier, *value, const_int->value.i[0]);
+         return false;
+      } else {
+         first_pass = false;
+         *value = const_int->value.u[0];
+      }
+
+      /* If the location is const (and we've verified that
+       * it is) then no instructions should have been emitted
+       * when we converted it to HIR. If they were emitted,
+       * then either the location isn't const after all, or
+       * we are emitting unnecessary instructions.
+       */
+      assert(dummy_instructions.is_empty());
    }
 
    return true;
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 13494446b59..881ee2b6b55 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -290,6 +290,20 @@ texture_multisample_array(const _mesa_glsl_parse_state *state)
 }
 
 static bool
+texture_samples_identical(const _mesa_glsl_parse_state *state)
+{
+   return texture_multisample(state) &&
+          state->EXT_shader_samples_identical_enable;
+}
+
+static bool
+texture_samples_identical_array(const _mesa_glsl_parse_state *state)
+{
+   return texture_multisample_array(state) &&
+          state->EXT_shader_samples_identical_enable;
+}
+
+static bool
 fs_texture_cube_map_array(const _mesa_glsl_parse_state *state)
 {
    return state->stage == MESA_SHADER_FRAGMENT &&
@@ -724,6 +738,7 @@ private:
 
    BA2(textureQueryLod);
    B1(textureQueryLevels);
+   BA2(textureSamplesIdentical);
    B1(dFdx);
    B1(dFdy);
    B1(fwidth);
@@ -2210,6 +2225,16 @@ builtin_builder::create_builtins()
 
                 NULL);
 
+   add_function("textureSamplesIdenticalEXT",
+                _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type,  glsl_type::ivec2_type),
+                _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type),
+                _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type),
+
+                _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type,  glsl_type::ivec3_type),
+                _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type),
+                _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type),
+                NULL);
+
    add_function("texture1D",
                 _texture(ir_tex, v110,         glsl_type::vec4_type,  glsl_type::sampler1D_type, glsl_type::float_type),
                 _texture(ir_txb, v110_fs_only, glsl_type::vec4_type,  glsl_type::sampler1D_type, glsl_type::float_type),
@@ -3573,7 +3598,16 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type
 
    ir_constant_data infinities;
    for (int i = 0; i < type->vector_elements; i++) {
-      infinities.f[i] = INFINITY;
+      switch (type->base_type) {
+      case GLSL_TYPE_FLOAT:
+         infinities.f[i] = INFINITY;
+         break;
+      case GLSL_TYPE_DOUBLE:
+         infinities.d[i] = INFINITY;
+         break;
+      default:
+         unreachable("unknown type");
+      }
    }
 
    body.emit(ret(equal(abs(x), imm(type, infinities))));
@@ -4675,6 +4709,25 @@ builtin_builder::_textureQueryLevels(const glsl_type *sampler_type)
    return sig;
 }
 
+ir_function_signature *
+builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail,
+                                          const glsl_type *sampler_type,
+                                          const glsl_type *coord_type)
+{
+   ir_variable *s = in_var(sampler_type, "sampler");
+   ir_variable *P = in_var(coord_type, "P");
+   const glsl_type *return_type = glsl_type::bool_type;
+   MAKE_SIG(return_type, avail, 2, s, P);
+
+   ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical);
+   tex->coordinate = var_ref(P);
+   tex->set_sampler(var_ref(s), return_type);
+
+   body.emit(ret(tex));
+
+   return sig;
+}
+
 UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives)
 UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control)
 UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control)
@@ -5243,8 +5296,8 @@ builtin_builder::_image_size_prototype(const glsl_type *image_type,
 
 ir_function_signature *
 builtin_builder::_image_samples_prototype(const glsl_type *image_type,
-                                          unsigned num_arguments,
-                                          unsigned flags)
+                                          unsigned /* num_arguments */,
+                                          unsigned /* flags */)
 {
    ir_variable *image = in_var(image_type, "image");
    ir_function_signature *sig =
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index b06c1bc5c12..e8eab808a19 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -327,6 +327,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
    this->fields[this->num_fields].centroid = 0;
    this->fields[this->num_fields].sample = 0;
    this->fields[this->num_fields].patch = 0;
+   this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
    this->num_fields++;
 }
 
@@ -376,6 +377,11 @@ private:
       return add_variable(name, type, ir_var_shader_out, slot);
    }
 
+   ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name)
+   {
+      return add_index_variable(name, type, ir_var_shader_out, slot, index);
+   }
+
    ir_variable *add_system_value(int slot, const glsl_type *type,
                                  const char *name)
    {
@@ -384,6 +390,8 @@ private:
 
    ir_variable *add_variable(const char *name, const glsl_type *type,
                              enum ir_variable_mode mode, int slot);
+   ir_variable *add_index_variable(const char *name, const glsl_type *type,
+                             enum ir_variable_mode mode, int slot, int index);
    ir_variable *add_uniform(const glsl_type *type, const char *name);
    ir_variable *add_const(const char *name, int value);
    ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
@@ -429,6 +437,46 @@ builtin_variable_generator::builtin_variable_generator(
 {
 }
 
+ir_variable *
+builtin_variable_generator::add_index_variable(const char *name,
+                                         const glsl_type *type,
+                                         enum ir_variable_mode mode, int slot, int index)
+{
+   ir_variable *var = new(symtab) ir_variable(type, name, mode);
+   var->data.how_declared = ir_var_declared_implicitly;
+
+   switch (var->data.mode) {
+   case ir_var_auto:
+   case ir_var_shader_in:
+   case ir_var_uniform:
+   case ir_var_system_value:
+      var->data.read_only = true;
+      break;
+   case ir_var_shader_out:
+   case ir_var_shader_storage:
+      break;
+   default:
+      /* The only variables that are added using this function should be
+       * uniforms, shader storage, shader inputs, and shader outputs, constants
+       * (which use ir_var_auto), and system values.
+       */
+      assert(0);
+      break;
+   }
+
+   var->data.location = slot;
+   var->data.explicit_location = (slot >= 0);
+   var->data.explicit_index = 1;
+   var->data.index = index;
+
+   /* Once the variable is created an initialized, add it to the symbol table
+    * and add the declaration to the IR stream.
+    */
+   instructions->push_tail(var);
+
+   symtab->add_variable(var);
+   return var;
+}
 
 ir_variable *
 builtin_variable_generator::add_variable(const char *name,
@@ -580,6 +628,14 @@ builtin_variable_generator::generate_constants()
          add_const("gl_MaxVaryingVectors",
                    state->ctx->Const.MaxVarying);
       }
+
+      /* EXT_blend_func_extended brings a built in constant
+       * for determining number of dual source draw buffers
+       */
+      if (state->EXT_blend_func_extended_enable) {
+         add_const("gl_MaxDualSourceDrawBuffersEXT",
+                   state->Const.MaxDualSourceDrawBuffers);
+      }
    } else {
       add_const("gl_MaxVertexUniformComponents",
                 state->Const.MaxVertexUniformComponents);
@@ -1016,6 +1072,19 @@ builtin_variable_generator::generate_fs_special_vars()
                  array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData");
    }
 
+   if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) {
+      /* We make an assumption here that there will only ever be one dual-source draw buffer
+       * In case this assumption is ever proven to be false, make sure to assert here
+       * since we don't handle this case.
+       * In practice, this issue will never arise since no hardware will support it.
+       */
+      assert(state->Const.MaxDualSourceDrawBuffers <= 1);
+      add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT");
+      add_index_output(FRAG_RESULT_DATA0, 1,
+                       array(vec4_t, state->Const.MaxDualSourceDrawBuffers),
+                       "gl_SecondaryFragDataEXT");
+   }
+
    /* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL
     * ES 1.00.
     */
@@ -1186,6 +1255,7 @@ builtin_variable_generator::generate_varyings()
          var->data.centroid = fields[i].centroid;
          var->data.sample = fields[i].sample;
          var->data.patch = fields[i].patch;
+         var->data.precision = fields[i].precision;
          var->init_interface_type(per_vertex_out_type);
       }
    }
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 6aa7abec00e..2fd4cf04079 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2384,6 +2384,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
                  add_builtin_define(parser, "GL_OES_standard_derivatives", 1);
               if (extensions->ARB_texture_multisample)
                  add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
+              if (extensions->ARB_blend_func_extended)
+                 add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
 	   }
 	} else {
 	   add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
@@ -2510,6 +2512,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	if (extensions != NULL) {
 	   if (extensions->EXT_shader_integer_mix)
 	      add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1);
+
+	   if (extensions->EXT_shader_samples_identical)
+	      add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1);
 	}
 
 	if (version >= 150)
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index adf6a05acce..5a8f98019d1 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -298,7 +298,6 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %type <node> conditionopt
 %type <node> for_init_statement
 %type <for_rest_statement> for_rest_statement
-%type <n> integer_constant
 %type <node> layout_defaults
 
 %right THEN ELSE
@@ -1152,11 +1151,6 @@ layout_qualifier_id_list:
    }
    ;
 
-integer_constant:
-   INTCONSTANT { $$ = $1; }
-   | UINTCONSTANT { $$ = $1; }
-   ;
-
 layout_qualifier_id:
    any_identifier
    {
@@ -1453,9 +1447,18 @@ layout_qualifier_id:
          YYERROR;
       }
    }
-   | any_identifier '=' integer_constant
+   | any_identifier '=' constant_expression
    {
       memset(& $$, 0, sizeof($$));
+      void *ctx = state;
+
+      if ($3->oper != ast_int_constant &&
+          $3->oper != ast_uint_constant &&
+          !state->has_enhanced_layouts()) {
+         _mesa_glsl_error(& @1, state,
+                          "compile-time constant expressions require "
+                          "GLSL 4.40 or ARB_enhanced_layouts");
+      }
 
       if (match_layout_qualifier("location", $1, state) == 0) {
          $$.flags.q.explicit_location = 1;
@@ -1466,24 +1469,17 @@ layout_qualifier_id:
                                "GL_ARB_explicit_attrib_location layout "
                                "identifier `%s' used", $1);
          }
-
-         if ($3 >= 0) {
-            $$.location = $3;
-         } else {
-             _mesa_glsl_error(& @3, state, "invalid location %d specified", $3);
-             YYERROR;
-         }
+         $$.location = $3;
       }
 
       if (match_layout_qualifier("index", $1, state) == 0) {
-         $$.flags.q.explicit_index = 1;
-
-         if ($3 >= 0) {
-            $$.index = $3;
-         } else {
-            _mesa_glsl_error(& @3, state, "invalid index %d specified", $3);
+         if (state->es_shader && !state->EXT_blend_func_extended_enable) {
+            _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended");
             YYERROR;
          }
+
+         $$.flags.q.explicit_index = 1;
+         $$.index = $3;
       }
 
       if ((state->has_420pack() ||
@@ -1502,18 +1498,11 @@ layout_qualifier_id:
 
       if (match_layout_qualifier("max_vertices", $1, state) == 0) {
          $$.flags.q.max_vertices = 1;
-
-         if ($3 < 0) {
+         $$.max_vertices = new(ctx) ast_layout_expression(@1, $3);
+         if (!state->is_version(150, 0)) {
             _mesa_glsl_error(& @3, state,
-                             "invalid max_vertices %d specified", $3);
-            YYERROR;
-         } else {
-            $$.max_vertices = $3;
-            if (!state->is_version(150, 0)) {
-               _mesa_glsl_error(& @3, state,
-                                "#version 150 max_vertices qualifier "
-                                "specified", $3);
-            }
+                             "#version 150 max_vertices qualifier "
+                             "specified", $3);
          }
       }
 
@@ -1521,15 +1510,8 @@ layout_qualifier_id:
          if (match_layout_qualifier("stream", $1, state) == 0 &&
              state->check_explicit_attrib_stream_allowed(& @3)) {
             $$.flags.q.stream = 1;
-
-            if ($3 < 0) {
-               _mesa_glsl_error(& @3, state,
-                                "invalid stream %d specified", $3);
-               YYERROR;
-            } else {
-               $$.flags.q.explicit_stream = 1;
-               $$.stream = $3;
-            }
+            $$.flags.q.explicit_stream = 1;
+            $$.stream = $3;
          }
       }
 
@@ -1541,12 +1523,7 @@ layout_qualifier_id:
       for (int i = 0; i < 3; i++) {
          if (match_layout_qualifier(local_size_qualifiers[i], $1,
                                     state) == 0) {
-            if ($3 <= 0) {
-               _mesa_glsl_error(& @3, state,
-                                "invalid %s of %d specified",
-                                local_size_qualifiers[i], $3);
-               YYERROR;
-            } else if (!state->has_compute_shader()) {
+            if (!state->has_compute_shader()) {
                _mesa_glsl_error(& @3, state,
                                 "%s qualifier requires GLSL 4.30 or "
                                 "GLSL ES 3.10 or ARB_compute_shader",
@@ -1554,7 +1531,7 @@ layout_qualifier_id:
                YYERROR;
             } else {
                $$.flags.q.local_size |= (1 << i);
-               $$.local_size[i] = $3;
+               $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3);
             }
             break;
          }
@@ -1562,48 +1539,24 @@ layout_qualifier_id:
 
       if (match_layout_qualifier("invocations", $1, state) == 0) {
          $$.flags.q.invocations = 1;
-
-         if ($3 <= 0) {
+         $$.invocations = new(ctx) ast_layout_expression(@1, $3);
+         if (!state->is_version(400, 0) &&
+             !state->ARB_gpu_shader5_enable) {
             _mesa_glsl_error(& @3, state,
-                             "invalid invocations %d specified", $3);
-            YYERROR;
-         } else if ($3 > MAX_GEOMETRY_SHADER_INVOCATIONS) {
-            _mesa_glsl_error(& @3, state,
-                             "invocations (%d) exceeds "
-                             "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", $3);
-            YYERROR;
-         } else {
-            $$.invocations = $3;
-            if (!state->is_version(400, 0) &&
-                !state->ARB_gpu_shader5_enable) {
-               _mesa_glsl_error(& @3, state,
-                                "GL_ARB_gpu_shader5 invocations "
-                                "qualifier specified", $3);
-            }
+                             "GL_ARB_gpu_shader5 invocations "
+                             "qualifier specified", $3);
          }
       }
 
       /* Layout qualifiers for tessellation control shaders. */
       if (match_layout_qualifier("vertices", $1, state) == 0) {
          $$.flags.q.vertices = 1;
-
-         if ($3 <= 0) {
-            _mesa_glsl_error(& @3, state,
-                             "invalid vertices (%d) specified", $3);
-            YYERROR;
-         } else if ($3 > (int)state->Const.MaxPatchVertices) {
-            _mesa_glsl_error(& @3, state,
-                             "vertices (%d) exceeds "
-                             "GL_MAX_PATCH_VERTICES", $3);
-            YYERROR;
-         } else {
-            $$.vertices = $3;
-            if (!state->ARB_tessellation_shader_enable &&
-                !state->is_version(400, 0)) {
-               _mesa_glsl_error(& @1, state,
-                                "vertices qualifier requires GLSL 4.00 or "
-                                "ARB_tessellation_shader");
-            }
+         $$.vertices = new(ctx) ast_layout_expression(@1, $3);
+         if (!state->ARB_tessellation_shader_enable &&
+             !state->is_version(400, 0)) {
+            _mesa_glsl_error(& @1, state,
+                             "vertices qualifier requires GLSL 4.00 or "
+                             "ARB_tessellation_shader");
          }
       }
 
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 02584c62a4d..b41b64af2c1 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -104,6 +104,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
 
    this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers;
 
+   this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers;
+
    /* 1.50 constants */
    this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
    this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents;
@@ -646,9 +648,11 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(AMD_shader_trinary_minmax,      true,  false,     dummy_true),
    EXT(AMD_vertex_shader_layer,        true,  false,     AMD_vertex_shader_layer),
    EXT(AMD_vertex_shader_viewport_index, true,  false,   AMD_vertex_shader_viewport_index),
+   EXT(EXT_blend_func_extended,        false,  true,     ARB_blend_func_extended),
    EXT(EXT_draw_buffers,               false,  true,     dummy_true),
    EXT(EXT_separate_shader_objects,    false, true,      dummy_true),
    EXT(EXT_shader_integer_mix,         true,  true,      EXT_shader_integer_mix),
+   EXT(EXT_shader_samples_identical,   true,  true,      EXT_shader_samples_identical),
    EXT(EXT_texture_array,              true,  false,     EXT_texture_array),
 };
 
@@ -1646,8 +1650,20 @@ set_shader_inout_layout(struct gl_shader *shader,
    switch (shader->Stage) {
    case MESA_SHADER_TESS_CTRL:
       shader->TessCtrl.VerticesOut = 0;
-      if (state->tcs_output_vertices_specified)
-         shader->TessCtrl.VerticesOut = state->out_qualifier->vertices;
+      if (state->tcs_output_vertices_specified) {
+         unsigned vertices;
+         if (state->out_qualifier->vertices->
+               process_qualifier_constant(state, "vertices", &vertices,
+                                          false)) {
+
+            YYLTYPE loc = state->out_qualifier->vertices->get_location();
+            if (vertices > state->Const.MaxPatchVertices) {
+               _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+                                "GL_MAX_PATCH_VERTICES", vertices);
+            }
+            shader->TessCtrl.VerticesOut = vertices;
+         }
+      }
       break;
    case MESA_SHADER_TESS_EVAL:
       shader->TessEval.PrimitiveMode = PRIM_UNKNOWN;
@@ -1668,8 +1684,14 @@ set_shader_inout_layout(struct gl_shader *shader,
       break;
    case MESA_SHADER_GEOMETRY:
       shader->Geom.VerticesOut = 0;
-      if (state->out_qualifier->flags.q.max_vertices)
-         shader->Geom.VerticesOut = state->out_qualifier->max_vertices;
+      if (state->out_qualifier->flags.q.max_vertices) {
+         unsigned qual_max_vertices;
+         if (state->out_qualifier->max_vertices->
+               process_qualifier_constant(state, "max_vertices",
+                                          &qual_max_vertices, true)) {
+            shader->Geom.VerticesOut = qual_max_vertices;
+         }
+      }
 
       if (state->gs_input_prim_type_specified) {
          shader->Geom.InputType = state->in_qualifier->prim_type;
@@ -1684,8 +1706,22 @@ set_shader_inout_layout(struct gl_shader *shader,
       }
 
       shader->Geom.Invocations = 0;
-      if (state->in_qualifier->flags.q.invocations)
-         shader->Geom.Invocations = state->in_qualifier->invocations;
+      if (state->in_qualifier->flags.q.invocations) {
+         unsigned invocations;
+         if (state->in_qualifier->invocations->
+               process_qualifier_constant(state, "invocations",
+                                          &invocations, false)) {
+
+            YYLTYPE loc = state->in_qualifier->invocations->get_location();
+            if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) {
+               _mesa_glsl_error(&loc, state,
+                                "invocations (%d) exceeds "
+                                "GL_MAX_GEOMETRY_SHADER_INVOCATIONS",
+                                invocations);
+            }
+            shader->Geom.Invocations = invocations;
+         }
+      }
       break;
 
    case MESA_SHADER_COMPUTE:
@@ -1797,6 +1833,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
    if (shader->InfoLog)
       ralloc_free(shader->InfoLog);
 
+   if (!state->error)
+      set_shader_inout_layout(shader, state);
+
    shader->symbols = new(shader->ir) glsl_symbol_table;
    shader->CompileStatus = !state->error;
    shader->InfoLog = state->info_log;
@@ -1804,9 +1843,6 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
    shader->IsES = state->es_shader;
    shader->uses_builtin_functions = state->uses_builtin_functions;
 
-   if (!state->error)
-      set_shader_inout_layout(shader, state);
-
    /* Retain any live IR, but trash the rest. */
    reparent_ir(shader->ir, shader->ir);
 
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 1d8c1b8799f..17ff0b5af79 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -380,6 +380,9 @@ struct _mesa_glsl_parse_state {
       /* ARB_draw_buffers */
       unsigned MaxDrawBuffers;
 
+      /* ARB_blend_func_extended */
+      unsigned MaxDualSourceDrawBuffers;
+
       /* 3.00 ES */
       int MinProgramTexelOffset;
       int MaxProgramTexelOffset;
@@ -595,12 +598,16 @@ struct _mesa_glsl_parse_state {
    bool AMD_vertex_shader_layer_warn;
    bool AMD_vertex_shader_viewport_index_enable;
    bool AMD_vertex_shader_viewport_index_warn;
+   bool EXT_blend_func_extended_enable;
+   bool EXT_blend_func_extended_warn;
    bool EXT_draw_buffers_enable;
    bool EXT_draw_buffers_warn;
    bool EXT_separate_shader_objects_enable;
    bool EXT_separate_shader_objects_warn;
    bool EXT_shader_integer_mix_enable;
    bool EXT_shader_integer_mix_warn;
+   bool EXT_shader_samples_identical_enable;
+   bool EXT_shader_samples_identical_warn;
    bool EXT_texture_array_enable;
    bool EXT_texture_array_warn;
    /*@}*/
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 8933b230177..ca520f547a1 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1421,12 +1421,11 @@ ir_dereference::is_lvalue() const
 }
 
 
-static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" };
+static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" };
 
 const char *ir_texture::opcode_string()
 {
-   assert((unsigned int) op <=
-	  sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]));
+   assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
    return tex_opcode_strs[op];
 }
 
@@ -1456,6 +1455,10 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
    } else if (this->op == ir_lod) {
       assert(type->vector_elements == 2);
       assert(type->base_type == GLSL_TYPE_FLOAT);
+   } else if (this->op == ir_samples_identical) {
+      assert(type == glsl_type::bool_type);
+      assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
+      assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
    } else {
       assert(sampler->type->sampler_type == (int) type->base_type);
       if (sampler->type->sampler_shadow)
@@ -1676,6 +1679,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
    this->data.interpolation = INTERP_QUALIFIER_NONE;
    this->data.max_array_access = 0;
    this->data.atomic.offset = 0;
+   this->data.precision = GLSL_PRECISION_NONE;
    this->data.image_read_only = false;
    this->data.image_write_only = false;
    this->data.image_coherent = false;
@@ -1842,6 +1846,7 @@ ir_function_signature::replace_parameters(exec_list *new_params)
 ir_function::ir_function(const char *name)
    : ir_instruction(ir_type_function)
 {
+   this->subroutine_index = -1;
    this->name = ralloc_strdup(this, name);
 }
 
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index d59dee1e369..e1109eec1d3 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1171,6 +1171,8 @@ public:
     */
    int num_subroutine_types;
    const struct glsl_type **subroutine_types;
+
+   int subroutine_index;
 };
 
 inline const char *ir_function_signature::function_name() const
@@ -1965,6 +1967,7 @@ enum ir_texture_opcode {
    ir_tg4,		/**< Texture gather */
    ir_query_levels,     /**< Texture levels query */
    ir_texture_samples,  /**< Texture samples query */
+   ir_samples_identical, /**< Query whether all samples are definitely identical. */
 };
 
 
@@ -1991,6 +1994,7 @@ enum ir_texture_opcode {
  * (lod <type> <sampler> <coordinate>)
  * (tg4 <type> <sampler> <coordinate> <offset> <component>)
  * (query_levels <type> <sampler>)
+ * (samples_identical <sampler> <coordinate>)
  */
 class ir_texture : public ir_rvalue {
 public:
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index d6b06eeec87..2aef4fcb4ac 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -223,6 +223,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
@@ -269,6 +270,7 @@ ir_function::clone(void *mem_ctx, struct hash_table *ht) const
    ir_function *copy = new(mem_ctx) ir_function(this->name);
 
    copy->is_subroutine = this->is_subroutine;
+   copy->subroutine_index = this->subroutine_index;
    copy->num_subroutine_types = this->num_subroutine_types;
    copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types);
    for (int i = 0; i < copy->num_subroutine_types; i++)
diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp
index 5f0785e0ece..b86f4ea16bb 100644
--- a/src/glsl/ir_equals.cpp
+++ b/src/glsl/ir_equals.cpp
@@ -58,8 +58,13 @@ ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const
       return false;
 
    for (unsigned i = 0; i < type->components(); i++) {
-      if (value.u[i] != other->value.u[i])
-         return false;
+      if (type->base_type == GLSL_TYPE_DOUBLE) {
+         if (value.d[i] != other->value.d[i])
+            return false;
+      } else {
+         if (value.u[i] != other->value.u[i])
+            return false;
+      }
    }
 
    return true;
@@ -152,6 +157,7 @@ ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       if (!lod_info.bias->equals(other->lod_info.bias, ignore))
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 6495cc4581d..213992af28c 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -195,6 +195,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       s = this->lod_info.bias->accept(v);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 42b03fdea52..fd7bc2eea98 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -268,6 +268,14 @@ void ir_print_visitor::visit(ir_texture *ir)
 {
    fprintf(f, "(%s ", ir->opcode_string());
 
+   if (ir->op == ir_samples_identical) {
+      ir->sampler->accept(this);
+      fprintf(f, " ");
+      ir->coordinate->accept(this);
+      fprintf(f, ")");
+      return;
+   }
+
    print_type(f, ir->type);
    fprintf(f, " ");
 
@@ -334,6 +342,8 @@ void ir_print_visitor::visit(ir_texture *ir)
    case ir_tg4:
       ir->lod_info.component->accept(this);
       break;
+   case ir_samples_identical:
+      unreachable(!"ir_samples_identical was already handled");
    };
    fprintf(f, ")");
 }
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index a6966f546bc..6486838b8b8 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -59,6 +59,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir)
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       handle_rvalue(&ir->lod_info.bias);
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 7e77a675db1..c0b4b3e820c 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -766,7 +766,7 @@ public:
                    gl_shader_stage consumer_stage);
    ~varying_matches();
    void record(ir_variable *producer_var, ir_variable *consumer_var);
-   unsigned assign_locations();
+   unsigned assign_locations(uint64_t reserved_slots);
    void store_locations() const;
 
 private:
@@ -986,7 +986,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
  * passed to varying_matches::record().
  */
 unsigned
-varying_matches::assign_locations()
+varying_matches::assign_locations(uint64_t reserved_slots)
 {
    /* Sort varying matches into an order that makes them easy to pack. */
    qsort(this->matches, this->num_matches, sizeof(*this->matches),
@@ -1013,6 +1013,10 @@ varying_matches::assign_locations()
           != this->matches[i].packing_class) {
          *location = ALIGN(*location, 4);
       }
+      while ((*location < MAX_VARYING * 4u) &&
+            (reserved_slots & (1u << *location / 4u))) {
+         *location = ALIGN(*location + 1, 4);
+      }
 
       this->matches[i].generic_location = *location;
 
@@ -1376,6 +1380,38 @@ canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
 }
 
 /**
+ * Generate a bitfield map of the explicit locations for shader varyings.
+ *
+ * In theory a 32 bits value will be enough but a 64 bits value is future proof.
+ */
+uint64_t
+reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode)
+{
+   assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
+   assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */
+
+   uint64_t slots = 0;
+   int var_slot;
+
+   if (!stage)
+      return slots;
+
+   foreach_in_list(ir_instruction, node, stage->ir) {
+      ir_variable *const var = node->as_variable();
+
+      if (var == NULL || var->data.mode != io_mode || !var->data.explicit_location)
+         continue;
+
+      var_slot = var->data.location - VARYING_SLOT_VAR0;
+      if (var_slot >= 0 && var_slot < MAX_VARYING)
+         slots |= 1u << var_slot;
+   }
+
+   return slots;
+}
+
+
+/**
  * Assign locations for all variables that are produced in one pipeline stage
  * (the "producer") and consumed in the next stage (the "consumer").
  *
@@ -1550,7 +1586,11 @@ assign_varying_locations(struct gl_context *ctx,
          matches.record(matched_candidate->toplevel_var, NULL);
    }
 
-   const unsigned slots_used = matches.assign_locations();
+   const uint64_t reserved_slots =
+      reserved_varying_slot(producer, ir_var_shader_out) |
+      reserved_varying_slot(consumer, ir_var_shader_in);
+
+   const unsigned slots_used = matches.assign_locations(reserved_slots);
    matches.store_locations();
 
    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index db00f8febc6..331d9a28007 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3864,10 +3864,43 @@ link_assign_subroutine_types(struct gl_shader_program *prog)
          sh->SubroutineFunctions[sh->NumSubroutineFunctions].types =
             ralloc_array(sh, const struct glsl_type *,
                          fn->num_subroutine_types);
+
+         /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the
+          * GLSL 4.5 spec:
+          *
+          *    "Each subroutine with an index qualifier in the shader must be
+          *    given a unique index, otherwise a compile or link error will be
+          *    generated."
+          */
+         for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+            if (sh->SubroutineFunctions[j].index != -1 &&
+                sh->SubroutineFunctions[j].index == fn->subroutine_index) {
+               linker_error(prog, "each subroutine index qualifier in the "
+                            "shader must be unique\n");
+               return;
+            }
+         }
+         sh->SubroutineFunctions[sh->NumSubroutineFunctions].index =
+            fn->subroutine_index;
+
          for (int j = 0; j < fn->num_subroutine_types; j++)
             sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j];
          sh->NumSubroutineFunctions++;
       }
+
+      /* Assign index for subroutines without an explicit index*/
+      int index = 0;
+      for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+         while (sh->SubroutineFunctions[j].index == -1) {
+            for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) {
+               if (sh->SubroutineFunctions[k].index == index)
+                  break;
+               else if (k == sh->NumSubroutineFunctions - 1)
+                  sh->SubroutineFunctions[j].index = index;
+            }
+            index++;
+         }
+      }
    }
 }
 
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index d8df3544f10..a26300d1d26 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -31,6 +31,7 @@
 #include "ir_visitor.h"
 #include "ir_hierarchical_visitor.h"
 #include "ir.h"
+#include "main/imports.h"
 
 /*
  * pass to lower GLSL IR to NIR
@@ -147,16 +148,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
 
    nir_lower_outputs_to_temporaries(shader);
 
-   /* TODO: Use _mesa_fls instead */
-   unsigned num_textures = 0;
-   for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++)
-      if (sh->Program->SamplersUsed & (1 << i))
-         num_textures = i;
-
    shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
    if (shader_prog->Label)
       shader->info.label = ralloc_strdup(shader, shader_prog->Label);
-   shader->info.num_textures = num_textures;
+   shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed);
    shader->info.num_ubos = sh->NumUniformBlocks;
    shader->info.num_abos = shader_prog->NumAtomicBuffers;
    shader->info.num_ssbos = sh->NumShaderStorageBlocks;
@@ -174,6 +169,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
       shader_prog->TransformFeedback.NumVarying > 0;
 
    switch (stage) {
+   case MESA_SHADER_TESS_CTRL:
+      shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut;
+      break;
+
    case MESA_SHADER_GEOMETRY:
       shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
       shader->info.gs.output_primitive = sh->Geom.OutputType;
@@ -244,6 +243,8 @@ constant_copy(ir_constant *ir, void *mem_ctx)
 
    unsigned total_elems = ir->type->components();
    unsigned i;
+
+   ret->num_elements = 0;
    switch (ir->type->base_type) {
    case GLSL_TYPE_UINT:
       for (i = 0; i < total_elems; i++)
@@ -268,6 +269,8 @@ constant_copy(ir_constant *ir, void *mem_ctx)
    case GLSL_TYPE_STRUCT:
       ret->elements = ralloc_array(mem_ctx, nir_constant *,
                                    ir->type->length);
+      ret->num_elements = ir->type->length;
+
       i = 0;
       foreach_in_list(ir_constant, field, &ir->components) {
          ret->elements[i] = constant_copy(field, mem_ctx);
@@ -278,6 +281,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
    case GLSL_TYPE_ARRAY:
       ret->elements = ralloc_array(mem_ctx, nir_constant *,
                                    ir->type->length);
+      ret->num_elements = ir->type->length;
 
       for (i = 0; i < ir->type->length; i++)
          ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
@@ -297,15 +301,6 @@ nir_visitor::visit(ir_variable *ir)
    var->type = ir->type;
    var->name = ralloc_strdup(var, ir->name);
 
-   if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) {
-      unsigned size = ir->get_interface_type()->length;
-      var->max_ifc_array_access = ralloc_array(var, unsigned, size);
-      memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(),
-             size * sizeof(unsigned));
-   } else {
-      var->max_ifc_array_access = NULL;
-   }
-
    var->data.read_only = ir->data.read_only;
    var->data.centroid = ir->data.centroid;
    var->data.sample = ir->data.sample;
@@ -1543,9 +1538,9 @@ nir_visitor::visit(ir_expression *ir)
       result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
                              : nir_for(&b, srcs[0], srcs[1]);
       break;
-   case ir_binop_logic_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
-      result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
-                             : nir_for(&b, srcs[0], srcs[1]);
+   case ir_binop_logic_xor:
+      result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
+                             : nir_fxor(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
    case ir_binop_rshift:
@@ -1808,6 +1803,11 @@ nir_visitor::visit(ir_texture *ir)
       num_srcs = 0;
       break;
 
+   case ir_samples_identical:
+      op = nir_texop_samples_identical;
+      num_srcs = 1; /* coordinate */
+      break;
+
    default:
       unreachable("not reached");
    }
@@ -1835,8 +1835,9 @@ nir_visitor::visit(ir_texture *ir)
    case GLSL_TYPE_INT:
       instr->dest_type = nir_type_int;
       break;
+   case GLSL_TYPE_BOOL:
    case GLSL_TYPE_UINT:
-      instr->dest_type = nir_type_unsigned;
+      instr->dest_type = nir_type_uint;
       break;
    default:
       unreachable("not reached");
diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp
index 3e9d38f7707..64b5c0cb106 100644
--- a/src/glsl/nir/glsl_types.cpp
+++ b/src/glsl/nir/glsl_types.cpp
@@ -130,6 +130,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
       this->fields.structure[i].image_coherent = fields[i].image_coherent;
       this->fields.structure[i].image_volatile = fields[i].image_volatile;
       this->fields.structure[i].image_restrict = fields[i].image_restrict;
+      this->fields.structure[i].precision = fields[i].precision;
    }
 
    mtx_unlock(&glsl_type::mutex);
diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h
index 14c2aa49f85..1aafa5cd547 100644
--- a/src/glsl/nir/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
@@ -858,7 +858,7 @@ struct glsl_struct_field {
    /**
     * Precision qualifier
     */
-   unsigned precision;
+   unsigned precision:2;
 
    /**
     * Image qualifiers, applicable to buffer variables defined in shader
@@ -873,7 +873,8 @@ struct glsl_struct_field {
 #ifdef __cplusplus
    glsl_struct_field(const struct glsl_type *_type, const char *_name)
       : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
-        sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0)
+        sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+        precision(GLSL_PRECISION_NONE)
    {
       /* empty */
    }
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 3157ff82d99..79df6d3df94 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -107,6 +107,10 @@ void
 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
 {
    switch (var->data.mode) {
+   case nir_var_all:
+      assert(!"invalid mode");
+      break;
+
    case nir_var_local:
       assert(!"nir_shader_add_variable cannot be used for local variables");
       break;
@@ -312,6 +316,14 @@ nir_block_create(nir_shader *shader)
    block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
                                           _mesa_key_pointer_equal);
    block->imm_dom = NULL;
+   /* XXX maybe it would be worth it to defer allocation?  This
+    * way it doesn't get allocated for shader ref's that never run
+    * nir_calc_dominance?  For example, state-tracker creates an
+    * initial IR, clones that, runs appropriate lowering pass, passes
+    * to driver which does common lowering/opt, and then stores ref
+    * which is later used to do state specific lowering and futher
+    * opt.  Do any of the references not need dominance metadata?
+    */
    block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
                                           _mesa_key_pointer_equal);
 
@@ -1306,21 +1318,62 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
 {
    assert(!new_src.is_ssa || def != new_src.ssa);
 
-   nir_foreach_use_safe(def, use_src) {
-      nir_instr *src_parent_instr = use_src->parent_instr;
-      list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, src_parent_instr);
-      src_add_all_uses(use_src, src_parent_instr, NULL);
-   }
+   nir_foreach_use_safe(def, use_src)
+      nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+
+   nir_foreach_if_use_safe(def, use_src)
+      nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
+
+static bool
+is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
+{
+   assert(start->block == end->block);
+
+   if (between->block != start->block)
+      return false;
+
+   /* Search backwards looking for "between" */
+   while (start != end) {
+      if (between == end)
+         return true;
 
-   nir_foreach_if_use_safe(def, use_src) {
-      nir_if *src_parent_if = use_src->parent_if;
-      list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, src_parent_if);
-      src_add_all_uses(use_src, NULL, src_parent_if);
+      end = nir_instr_prev(end);
+      assert(end);
    }
+
+   return false;
 }
 
+/* Replaces all uses of the given SSA def with the given source but only if
+ * the use comes after the after_me instruction.  This can be useful if you
+ * are emitting code to fix up the result of some instruction: you can freely
+ * use the result in that code and then call rewrite_uses_after and pass the
+ * last fixup instruction as after_me and it will replace all of the uses you
+ * want without touching the fixup code.
+ *
+ * This function assumes that after_me is in the same block as
+ * def->parent_instr and that after_me comes after def->parent_instr.
+ */
+void
+nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+                               nir_instr *after_me)
+{
+   assert(!new_src.is_ssa || def != new_src.ssa);
+
+   nir_foreach_use_safe(def, use_src) {
+      assert(use_src->parent_instr != def->parent_instr);
+      /* Since def already dominates all of its uses, the only way a use can
+       * not be dominated by after_me is if it is between def and after_me in
+       * the instruction list.
+       */
+      if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
+         nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+   }
+
+   nir_foreach_if_use_safe(def, use_src)
+      nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
 
 static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
                             bool reverse, void *state);
@@ -1571,6 +1624,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
       return nir_intrinsic_load_tess_level_inner;
    case SYSTEM_VALUE_VERTICES_IN:
       return nir_intrinsic_load_patch_vertices_in;
+   case SYSTEM_VALUE_HELPER_INVOCATION:
+      return nir_intrinsic_load_helper_invocation;
    default:
       unreachable("system value does not directly correspond to intrinsic");
    }
@@ -1614,6 +1669,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
       return SYSTEM_VALUE_TESS_LEVEL_INNER;
    case nir_intrinsic_load_patch_vertices_in:
       return SYSTEM_VALUE_VERTICES_IN;
+   case nir_intrinsic_load_helper_invocation:
+      return SYSTEM_VALUE_HELPER_INVOCATION;
    default:
       unreachable("intrinsic doesn't produce a system value");
    }
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index df0e6f1f54a..b7374e17407 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -82,6 +82,7 @@ typedef struct {
 } nir_state_slot;
 
 typedef enum {
+   nir_var_all = -1,
    nir_var_shader_in,
    nir_var_shader_out,
    nir_var_global,
@@ -111,6 +112,11 @@ typedef struct nir_constant {
     */
    union nir_constant_data value;
 
+   /* we could get this from the var->type but makes clone *much* easier to
+    * not have to care about the type.
+    */
+   unsigned num_elements;
+
    /* Array elements / Structure Fields */
    struct nir_constant **elements;
 } nir_constant;
@@ -147,19 +153,6 @@ typedef struct {
     */
    char *name;
 
-   /**
-    * For variables which satisfy the is_interface_instance() predicate, this
-    * points to an array of integers such that if the ith member of the
-    * interface block is an array, max_ifc_array_access[i] is the maximum
-    * array element of that member that has been accessed.  If the ith member
-    * of the interface block is not an array, max_ifc_array_access[i] is
-    * unused.
-    *
-    * For variables whose type is not an interface block, this pointer is
-    * NULL.
-    */
-   unsigned *max_ifc_array_access;
-
    struct nir_variable_data {
 
       /**
@@ -654,7 +647,7 @@ typedef enum {
    nir_type_invalid = 0, /* Not a valid type */
    nir_type_float,
    nir_type_int,
-   nir_type_unsigned,
+   nir_type_uint,
    nir_type_bool
 } nir_alu_type;
 
@@ -977,6 +970,9 @@ typedef enum {
    nir_texop_tg4,                /**< Texture gather */
    nir_texop_query_levels,       /**< Texture levels query */
    nir_texop_texture_samples,    /**< Texture samples query */
+   nir_texop_samples_identical,  /**< Query whether all samples are definitely
+                                  * identical.
+                                  */
 } nir_texop;
 
 typedef struct {
@@ -1069,6 +1065,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
 
    case nir_texop_texture_samples:
    case nir_texop_query_levels:
+   case nir_texop_samples_identical:
       return 1;
 
    default:
@@ -1079,6 +1076,31 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
    }
 }
 
+/* Returns true if this texture operation queries something about the texture
+ * rather than actually sampling it.
+ */
+static inline bool
+nir_tex_instr_is_query(nir_tex_instr *instr)
+{
+   switch (instr->op) {
+   case nir_texop_txs:
+   case nir_texop_lod:
+   case nir_texop_texture_samples:
+   case nir_texop_query_levels:
+      return true;
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txl:
+   case nir_texop_txd:
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_tg4:
+      return false;
+   default:
+      unreachable("Invalid texture opcode");
+   }
+}
+
 static inline unsigned
 nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
 {
@@ -1353,6 +1375,7 @@ typedef enum {
    nir_metadata_block_index = 0x1,
    nir_metadata_dominance = 0x2,
    nir_metadata_live_ssa_defs = 0x4,
+   nir_metadata_not_properly_reset = 0x8,
 } nir_metadata;
 
 typedef struct {
@@ -1578,6 +1601,11 @@ typedef struct nir_shader_info {
       struct {
          unsigned local_size[3];
       } cs;
+
+      struct {
+         /** The number of vertices in the TCS output patch. */
+         unsigned vertices_out;
+      } tcs;
    };
 } nir_shader_info;
 
@@ -1910,6 +1938,8 @@ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
 void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                       unsigned num_components, const char *name);
 void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
+void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+                                    nir_instr *after_me);
 
 /* visits basic blocks in source-code order */
 typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
@@ -1937,10 +1967,16 @@ void nir_index_blocks(nir_function_impl *impl);
 void nir_print_shader(nir_shader *shader, FILE *fp);
 void nir_print_instr(const nir_instr *instr, FILE *fp);
 
+nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
+void nir_metadata_set_validation_flag(nir_shader *shader);
+void nir_metadata_check_validation_flag(nir_shader *shader);
 #else
 static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
 #endif /* DEBUG */
 
 void nir_calc_dominance_impl(nir_function_impl *impl);
@@ -2032,9 +2068,22 @@ typedef struct nir_lower_tex_options {
    unsigned saturate_s;
    unsigned saturate_t;
    unsigned saturate_r;
+
+   /* Bitmask of samplers that need swizzling.
+    *
+    * If (swizzle_result & (1 << sampler_index)), then the swizzle in
+    * swizzles[sampler_index] is applied to the result of the texturing
+    * operation.
+    */
+   unsigned swizzle_result;
+
+   /* A swizzle for each sampler.  Values 0-3 represent x, y, z, or w swizzles
+    * while 4 and 5 represent 0 and 1 respectively.
+    */
+   uint8_t swizzles[32][4];
 } nir_lower_tex_options;
 
-void nir_lower_tex(nir_shader *shader,
+bool nir_lower_tex(nir_shader *shader,
                    const nir_lower_tex_options *options);
 
 void nir_lower_idiv(nir_shader *shader);
diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 205aa067b0b..fe41c74b608 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -256,7 +256,7 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
 {
    nir_alu_src alu_src = { NIR_SRC_INIT };
    alu_src.src = nir_src_for_ssa(src);
-   for (int i = 0; i < 4; i++)
+   for (unsigned i = 0; i < num_components; i++)
       alu_src.swizzle[i] = swiz[i];
 
    return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
@@ -290,6 +290,8 @@ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
 /**
  * Turns a nir_src into a nir_ssa_def * so it can be passed to
  * nir_build_alu()-based builder calls.
+ *
+ * See nir_ssa_for_alu_src() for alu instructions.
  */
 static inline nir_ssa_def *
 nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
@@ -305,6 +307,25 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
    return nir_imov_alu(build, alu, num_components);
 }
 
+/**
+ * Similar to nir_ssa_for_src(), but for alu src's, respecting the
+ * nir_alu_src's swizzle.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
+{
+   static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 };
+   nir_alu_src *src = &instr->src[srcn];
+   unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
+
+   if (src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
+       !src->abs && !src->negate &&
+       (memcmp(src->swizzle, trivial_swizzle, num_components) == 0))
+      return src->src.ssa;
+
+   return nir_imov_alu(build, *src, num_components);
+}
+
 static inline nir_ssa_def *
 nir_load_var(nir_builder *build, nir_variable *var)
 {
diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c
new file mode 100644
index 00000000000..68b72ef5381
--- /dev/null
+++ b/src/glsl/nir/nir_clone.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_control_flow_private.h"
+
+/* Secret Decoder Ring:
+ *   clone_foo():
+ *        Allocate and clone a foo.
+ *   __clone_foo():
+ *        Clone body of foo (ie. parent class, embedded struct, etc)
+ */
+
+typedef struct {
+   /* maps orig ptr -> cloned ptr: */
+   struct hash_table *ptr_table;
+
+   /* List of phi sources. */
+   struct list_head phi_srcs;
+
+   /* new shader object, used as memctx for just about everything else: */
+   nir_shader *ns;
+} clone_state;
+
+static void
+init_clone_state(clone_state *state)
+{
+   state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                              _mesa_key_pointer_equal);
+   list_inithead(&state->phi_srcs);
+}
+
+static void
+free_clone_state(clone_state *state)
+{
+   _mesa_hash_table_destroy(state->ptr_table, NULL);
+}
+
+static void *
+lookup_ptr(clone_state *state, const void *ptr)
+{
+   struct hash_entry *entry;
+
+   if (!ptr)
+      return NULL;
+
+   entry = _mesa_hash_table_search(state->ptr_table, ptr);
+   assert(entry && "Failed to find pointer!");
+   if (!entry)
+      return NULL;
+
+   return entry->data;
+}
+
+static void
+store_ptr(clone_state *state, void *nptr, const void *ptr)
+{
+   _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
+}
+
+static nir_constant *
+clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar)
+{
+   nir_constant *nc = ralloc(nvar, nir_constant);
+
+   nc->value = c->value;
+   nc->num_elements = c->num_elements;
+   nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
+   for (unsigned i = 0; i < c->num_elements; i++) {
+      nc->elements[i] = clone_constant(state, c->elements[i], nvar);
+   }
+
+   return nc;
+}
+
+/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
+ * having to deal with locals and globals separately:
+ */
+static nir_variable *
+clone_variable(clone_state *state, const nir_variable *var)
+{
+   nir_variable *nvar = rzalloc(state->ns, nir_variable);
+   store_ptr(state, nvar, var);
+
+   nvar->type = var->type;
+   nvar->name = ralloc_strdup(nvar, var->name);
+   nvar->data = var->data;
+   nvar->num_state_slots = var->num_state_slots;
+   nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
+   memcpy(nvar->state_slots, var->state_slots,
+          var->num_state_slots * sizeof(nir_state_slot));
+   if (var->constant_initializer) {
+      nvar->constant_initializer =
+         clone_constant(state, var->constant_initializer, nvar);
+   }
+   nvar->interface_type = var->interface_type;
+
+   return nvar;
+}
+
+/* clone list of nir_variable: */
+static void
+clone_var_list(clone_state *state, struct exec_list *dst,
+               const struct exec_list *list)
+{
+   exec_list_make_empty(dst);
+   foreach_list_typed(nir_variable, var, node, list) {
+      nir_variable *nvar = clone_variable(state, var);
+      exec_list_push_tail(dst, &nvar->node);
+   }
+}
+
+/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
+ * to avoid having to deal with locals and globals separately:
+ */
+static nir_register *
+clone_register(clone_state *state, const nir_register *reg)
+{
+   nir_register *nreg = rzalloc(state->ns, nir_register);
+   store_ptr(state, nreg, reg);
+
+   nreg->num_components = reg->num_components;
+   nreg->num_array_elems = reg->num_array_elems;
+   nreg->index = reg->index;
+   nreg->name = ralloc_strdup(nreg, reg->name);
+   nreg->is_global = reg->is_global;
+   nreg->is_packed = reg->is_packed;
+
+   /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
+   list_inithead(&nreg->uses);
+   list_inithead(&nreg->defs);
+   list_inithead(&nreg->if_uses);
+
+   return nreg;
+}
+
+/* clone list of nir_register: */
+static void
+clone_reg_list(clone_state *state, struct exec_list *dst,
+               const struct exec_list *list)
+{
+   exec_list_make_empty(dst);
+   foreach_list_typed(nir_register, reg, node, list) {
+      nir_register *nreg = clone_register(state, reg);
+      exec_list_push_tail(dst, &nreg->node);
+   }
+}
+
+static void
+__clone_src(clone_state *state, void *ninstr_or_if,
+            nir_src *nsrc, const nir_src *src)
+{
+   nsrc->is_ssa = src->is_ssa;
+   if (src->is_ssa) {
+      nsrc->ssa = lookup_ptr(state, src->ssa);
+   } else {
+      nsrc->reg.reg = lookup_ptr(state, src->reg.reg);
+      if (src->reg.indirect) {
+         nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
+         __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
+      }
+      nsrc->reg.base_offset = src->reg.base_offset;
+   }
+}
+
+static void
+__clone_dst(clone_state *state, nir_instr *ninstr,
+            nir_dest *ndst, const nir_dest *dst)
+{
+   ndst->is_ssa = dst->is_ssa;
+   if (dst->is_ssa) {
+      nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
+      store_ptr(state, &ndst->ssa, &dst->ssa);
+   } else {
+      ndst->reg.reg = lookup_ptr(state, dst->reg.reg);
+      if (dst->reg.indirect) {
+         ndst->reg.indirect = ralloc(ninstr, nir_src);
+         __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
+      }
+      ndst->reg.base_offset = dst->reg.base_offset;
+   }
+}
+
+static nir_deref *clone_deref(clone_state *state, const nir_deref *deref,
+                              nir_instr *ninstr, nir_deref *parent);
+
+static nir_deref_var *
+clone_deref_var(clone_state *state, const nir_deref_var *dvar,
+                nir_instr *ninstr)
+{
+   nir_variable *nvar = lookup_ptr(state, dvar->var);
+   nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar);
+
+   if (dvar->deref.child)
+      ndvar->deref.child = clone_deref(state, dvar->deref.child,
+                                       ninstr, &ndvar->deref);
+
+   return ndvar;
+}
+
+static nir_deref_array *
+clone_deref_array(clone_state *state, const nir_deref_array *darr,
+                  nir_instr *ninstr, nir_deref *parent)
+{
+   nir_deref_array *ndarr = nir_deref_array_create(parent);
+
+   ndarr->deref.type = darr->deref.type;
+   if (darr->deref.child)
+      ndarr->deref.child = clone_deref(state, darr->deref.child,
+                                       ninstr, &ndarr->deref);
+
+   ndarr->deref_array_type = darr->deref_array_type;
+   ndarr->base_offset = darr->base_offset;
+   if (ndarr->deref_array_type == nir_deref_array_type_indirect)
+      __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect);
+
+   return ndarr;
+}
+
+static nir_deref_struct *
+clone_deref_struct(clone_state *state, const nir_deref_struct *dstr,
+                   nir_instr *ninstr, nir_deref *parent)
+{
+   nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index);
+
+   ndstr->deref.type = dstr->deref.type;
+   if (dstr->deref.child)
+      ndstr->deref.child = clone_deref(state, dstr->deref.child,
+                                       ninstr, &ndstr->deref);
+
+   return ndstr;
+}
+
+static nir_deref *
+clone_deref(clone_state *state, const nir_deref *dref,
+            nir_instr *ninstr, nir_deref *parent)
+{
+   switch (dref->deref_type) {
+   case nir_deref_type_array:
+      return &clone_deref_array(state, nir_deref_as_array(dref),
+                                ninstr, parent)->deref;
+   case nir_deref_type_struct:
+      return &clone_deref_struct(state, nir_deref_as_struct(dref),
+                                 ninstr, parent)->deref;
+   default:
+      unreachable("bad deref type");
+      return NULL;
+   }
+}
+
+static nir_alu_instr *
+clone_alu(clone_state *state, const nir_alu_instr *alu)
+{
+   nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+
+   __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
+   nalu->dest.saturate = alu->dest.saturate;
+   nalu->dest.write_mask = alu->dest.write_mask;
+
+   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+      __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src);
+      nalu->src[i].negate = alu->src[i].negate;
+      nalu->src[i].abs = alu->src[i].abs;
+      memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+             sizeof(nalu->src[i].swizzle));
+   }
+
+   return nalu;
+}
+
+static nir_intrinsic_instr *
+clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr)
+{
+   nir_intrinsic_instr *nitr =
+      nir_intrinsic_instr_create(state->ns, itr->intrinsic);
+
+   unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables;
+   unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
+
+   if (nir_intrinsic_infos[itr->intrinsic].has_dest)
+      __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest);
+
+   nitr->num_components = itr->num_components;
+   memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
+
+   for (unsigned i = 0; i < num_variables; i++) {
+      nitr->variables[i] = clone_deref_var(state, itr->variables[i],
+                                           &nitr->instr);
+   }
+
+   for (unsigned i = 0; i < num_srcs; i++)
+      __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]);
+
+   return nitr;
+}
+
+static nir_load_const_instr *
+clone_load_const(clone_state *state, const nir_load_const_instr *lc)
+{
+   nir_load_const_instr *nlc =
+      nir_load_const_instr_create(state->ns, lc->def.num_components);
+
+   memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
+
+   store_ptr(state, &nlc->def, &lc->def);
+
+   return nlc;
+}
+
+static nir_ssa_undef_instr *
+clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
+{
+   nir_ssa_undef_instr *nsa =
+      nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+
+   store_ptr(state, &nsa->def, &sa->def);
+
+   return nsa;
+}
+
+static nir_tex_instr *
+clone_tex(clone_state *state, const nir_tex_instr *tex)
+{
+   nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs);
+
+   ntex->sampler_dim = tex->sampler_dim;
+   ntex->dest_type = tex->dest_type;
+   ntex->op = tex->op;
+   __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest);
+   for (unsigned i = 0; i < ntex->num_srcs; i++) {
+      ntex->src[i].src_type = tex->src[i].src_type;
+      __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src);
+   }
+   ntex->coord_components = tex->coord_components;
+   ntex->is_array = tex->is_array;
+   ntex->is_shadow = tex->is_shadow;
+   ntex->is_new_style_shadow = tex->is_new_style_shadow;
+   memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
+   ntex->component = tex->component;
+   ntex->texture_index = tex->texture_index;
+   ntex->texture_array_size = tex->texture_array_size;
+   if (tex->texture)
+      ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr);
+   ntex->sampler_index = tex->sampler_index;
+   if (tex->sampler)
+      ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr);
+
+   return ntex;
+}
+
+static nir_phi_instr *
+clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk)
+{
+   nir_phi_instr *nphi = nir_phi_instr_create(state->ns);
+
+   __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest);
+
+   /* Cloning a phi node is a bit different from other instructions.  The
+    * sources of phi instructions are the only time where we can use an SSA
+    * def before it is defined.  In order to handle this, we just copy over
+    * the sources from the old phi instruction directly and then fix them up
+    * in a second pass once all the instrutions in the function have been
+    * properly cloned.
+    *
+    * In order to ensure that the copied sources (which are the same as the
+    * old phi instruction's sources for now) don't get inserted into the old
+    * shader's use-def lists, we have to add the phi instruction *before* we
+    * set up its sources.
+    */
+   nir_instr_insert_after_block(nblk, &nphi->instr);
+
+   foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+      nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
+
+      /* Just copy the old source for now. */
+      memcpy(nsrc, src, sizeof(*src));
+
+      /* Since we're not letting nir_insert_instr handle use/def stuff for us,
+       * we have to set the parent_instr manually.  It doesn't really matter
+       * when we do it, so we might as well do it here.
+       */
+      nsrc->src.parent_instr = &nphi->instr;
+
+      /* Stash it in the list of phi sources.  We'll walk this list and fix up
+       * sources at the very end of clone_function_impl.
+       */
+      list_add(&nsrc->src.use_link, &state->phi_srcs);
+
+      exec_list_push_tail(&nphi->srcs, &nsrc->node);
+   }
+
+   return nphi;
+}
+
+static nir_jump_instr *
+clone_jump(clone_state *state, const nir_jump_instr *jmp)
+{
+   nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type);
+
+   return njmp;
+}
+
+static nir_call_instr *
+clone_call(clone_state *state, const nir_call_instr *call)
+{
+   nir_function_overload *ncallee = lookup_ptr(state, call->callee);
+   nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
+
+   for (unsigned i = 0; i < ncall->num_params; i++)
+      ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr);
+
+   ncall->return_deref = clone_deref_var(state, call->return_deref,
+                                         &ncall->instr);
+
+   return ncall;
+}
+
+static nir_instr *
+clone_instr(clone_state *state, const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      return &clone_alu(state, nir_instr_as_alu(instr))->instr;
+   case nir_instr_type_intrinsic:
+      return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr;
+   case nir_instr_type_load_const:
+      return &clone_load_const(state, nir_instr_as_load_const(instr))->instr;
+   case nir_instr_type_ssa_undef:
+      return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr;
+   case nir_instr_type_tex:
+      return &clone_tex(state, nir_instr_as_tex(instr))->instr;
+   case nir_instr_type_phi:
+      unreachable("Cannot clone phis with clone_instr");
+   case nir_instr_type_jump:
+      return &clone_jump(state, nir_instr_as_jump(instr))->instr;
+   case nir_instr_type_call:
+      return &clone_call(state, nir_instr_as_call(instr))->instr;
+   case nir_instr_type_parallel_copy:
+      unreachable("Cannot clone parallel copies");
+   default:
+      unreachable("bad instr type");
+      return NULL;
+   }
+}
+
+static nir_block *
+clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk)
+{
+   /* Don't actually create a new block.  Just use the one from the tail of
+    * the list.  NIR guarantees that the tail of the list is a block and that
+    * no two blocks are side-by-side in the IR;  It should be empty.
+    */
+   nir_block *nblk =
+      exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
+   assert(nblk->cf_node.type == nir_cf_node_block);
+   assert(exec_list_is_empty(&nblk->instr_list));
+
+   /* We need this for phi sources */
+   store_ptr(state, nblk, blk);
+
+   nir_foreach_instr(blk, instr) {
+      if (instr->type == nir_instr_type_phi) {
+         /* Phi instructions are a bit of a special case when cloning because
+          * we don't want inserting the instruction to automatically handle
+          * use/defs for us.  Instead, we need to wait until all the
+          * blocks/instructions are in so that we can set their sources up.
+          */
+         clone_phi(state, nir_instr_as_phi(instr), nblk);
+      } else {
+         nir_instr *ninstr = clone_instr(state, instr);
+         nir_instr_insert_after_block(nblk, ninstr);
+      }
+   }
+
+   return nblk;
+}
+
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+              const struct exec_list *list);
+
+static nir_if *
+clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i)
+{
+   nir_if *ni = nir_if_create(state->ns);
+
+   __clone_src(state, ni, &ni->condition, &i->condition);
+
+   nir_cf_node_insert_end(cf_list, &ni->cf_node);
+
+   clone_cf_list(state, &ni->then_list, &i->then_list);
+   clone_cf_list(state, &ni->else_list, &i->else_list);
+
+   return ni;
+}
+
+static nir_loop *
+clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
+{
+   nir_loop *nloop = nir_loop_create(state->ns);
+
+   nir_cf_node_insert_end(cf_list, &nloop->cf_node);
+
+   clone_cf_list(state, &nloop->body, &loop->body);
+
+   return nloop;
+}
+
+/* clone list of nir_cf_node: */
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+              const struct exec_list *list)
+{
+   foreach_list_typed(nir_cf_node, cf, node, list) {
+      switch (cf->type) {
+      case nir_cf_node_block:
+         clone_block(state, dst, nir_cf_node_as_block(cf));
+         break;
+      case nir_cf_node_if:
+         clone_if(state, dst, nir_cf_node_as_if(cf));
+         break;
+      case nir_cf_node_loop:
+         clone_loop(state, dst, nir_cf_node_as_loop(cf));
+         break;
+      default:
+         unreachable("bad cf type");
+      }
+   }
+}
+
+static nir_function_impl *
+clone_function_impl(clone_state *state, const nir_function_impl *fi,
+                    nir_function_overload *nfo)
+{
+   nir_function_impl *nfi = nir_function_impl_create(nfo);
+
+   clone_var_list(state, &nfi->locals, &fi->locals);
+   clone_reg_list(state, &nfi->registers, &fi->registers);
+   nfi->reg_alloc = fi->reg_alloc;
+
+   nfi->num_params = fi->num_params;
+   nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
+   for (unsigned i = 0; i < fi->num_params; i++) {
+      nfi->params[i] = lookup_ptr(state, fi->params[i]);
+   }
+   nfi->return_var = lookup_ptr(state, fi->return_var);
+
+   assert(list_empty(&state->phi_srcs));
+
+   clone_cf_list(state, &nfi->body, &fi->body);
+
+   /* After we've cloned almost everything, we have to walk the list of phi
+    * sources and fix them up.  Thanks to loops, the block and SSA value for a
+    * phi source may not be defined when we first encounter it.  Instead, we
+    * add it to the phi_srcs list and we fix it up here.
+    */
+   list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) {
+      src->pred = lookup_ptr(state, src->pred);
+      assert(src->src.is_ssa);
+      src->src.ssa = lookup_ptr(state, src->src.ssa);
+
+      /* Remove from this list and place in the uses of the SSA def */
+      list_del(&src->src.use_link);
+      list_addtail(&src->src.use_link, &src->src.ssa->uses);
+   }
+   assert(list_empty(&state->phi_srcs));
+
+   /* All metadata is invalidated in the cloning process */
+   nfi->valid_metadata = 0;
+
+   return nfi;
+}
+
+static nir_function_overload *
+clone_function_overload(clone_state *state, const nir_function_overload *fo,
+                        nir_function *nfxn)
+{
+   nir_function_overload *nfo = nir_function_overload_create(nfxn);
+
+   /* Needed for call instructions */
+   store_ptr(state, nfo, fo);
+
+   nfo->num_params = fo->num_params;
+   nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params);
+   memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params);
+
+   nfo->return_type = fo->return_type;
+
+   /* At first glance, it looks like we should clone the function_impl here.
+    * However, call instructions need to be able to reference at least the
+    * overload and those will get processed as we clone the function_impl's.
+    * We stop here and do function_impls as a second pass.
+    */
+
+   return nfo;
+}
+
+static nir_function *
+clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
+{
+   assert(ns == state->ns);
+   nir_function *nfxn = nir_function_create(ns, fxn->name);
+
+   foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list)
+      clone_function_overload(state, fo, nfxn);
+
+   return nfxn;
+}
+
+nir_shader *
+nir_shader_clone(void *mem_ctx, const nir_shader *s)
+{
+   clone_state state;
+   init_clone_state(&state);
+
+   nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options);
+   state.ns = ns;
+
+   clone_var_list(&state, &ns->uniforms, &s->uniforms);
+   clone_var_list(&state, &ns->inputs,   &s->inputs);
+   clone_var_list(&state, &ns->outputs,  &s->outputs);
+   clone_var_list(&state, &ns->globals,  &s->globals);
+   clone_var_list(&state, &ns->system_values, &s->system_values);
+
+   /* Go through and clone functions and overloads */
+   foreach_list_typed(nir_function, fxn, node, &s->functions)
+      clone_function(&state, fxn, ns);
+
+   /* Only after all overloads are cloned can we clone the actual function
+    * implementations.  This is because nir_call_instr's need to reference the
+    * overloads of other functions and we don't know what order the functions
+    * will have in the list.
+    */
+   nir_foreach_overload(s, fo) {
+      nir_function_overload *nfo = lookup_ptr(&state, fo);
+      clone_function_impl(&state, fo->impl, nfo);
+   }
+
+   clone_reg_list(&state, &ns->registers, &s->registers);
+   ns->reg_alloc = s->reg_alloc;
+
+   ns->info = s->info;
+   ns->info.name = ralloc_strdup(ns, ns->info.name);
+   if (ns->info.label)
+      ns->info.label = ralloc_strdup(ns, ns->info.label);
+
+   ns->num_inputs = s->num_inputs;
+   ns->num_uniforms = s->num_uniforms;
+   ns->num_outputs = s->num_outputs;
+
+   free_clone_state(&state);
+
+   return ns;
+}
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index 2ba8554645d..b16ef503c92 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -213,7 +213,7 @@ unpack_half_1x16(uint16_t u)
 }
 
 /* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "unsigned", "bool"]:
+% for type in ["float", "int", "uint", "bool"]:
 struct ${type}_vec {
    ${type} x;
    ${type} y;
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 0a134aff211..de30db61eea 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -244,6 +244,7 @@ SYSTEM_VALUE(local_invocation_id, 3, 0)
 SYSTEM_VALUE(work_group_id, 3, 0)
 SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
 SYSTEM_VALUE(num_work_groups, 3, 0)
+SYSTEM_VALUE(helper_invocation, 1, 0)
 
 /*
  * The format of the indices depends on the type of the load.  For uniforms,
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
index 31ccfb2c02b..c58c7785b3f 100644
--- a/src/glsl/nir/nir_lower_clip.c
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -55,9 +55,11 @@ create_clipdist_var(nir_shader *shader, unsigned drvloc,
 
    if (output) {
       exec_list_push_tail(&shader->outputs, &var->node);
+      shader->num_outputs++; /* TODO use type_size() */
    }
    else {
       exec_list_push_tail(&shader->inputs, &var->node);
+      shader->num_inputs++;  /* TODO use type_size() */
    }
    return var;
 }
diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
index c961178c53a..f64b3eac8a0 100644
--- a/src/glsl/nir/nir_lower_idiv.c
+++ b/src/glsl/nir/nir_lower_idiv.c
@@ -52,10 +52,8 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
 
    bld->cursor = nir_before_instr(&alu->instr);
 
-   numer = nir_ssa_for_src(bld, alu->src[0].src,
-                           nir_ssa_alu_instr_src_components(alu, 0));
-   denom = nir_ssa_for_src(bld, alu->src[1].src,
-                           nir_ssa_alu_instr_src_components(alu, 1));
+   numer = nir_ssa_for_alu_src(bld, alu, 0);
+   denom = nir_ssa_for_alu_src(bld, alu, 1);
 
    if (is_signed) {
       af = nir_i2f(bld, numer);
@@ -96,7 +94,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
    r = nir_imul(bld, q, b);
    r = nir_isub(bld, a, r);
 
-   r = nir_ige(bld, r, b);
+   r = nir_uge(bld, r, b);
    r = nir_b2i(bld, r);
 
    q = nir_iadd(bld, q, r);
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 00a31458310..5683e69d865 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -197,7 +197,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
 
       nir_variable_mode mode = intrin->variables[0]->var->data.mode;
 
-      if (state->mode != -1 && state->mode != mode)
+      if (state->mode != nir_var_all && state->mode != mode)
          continue;
 
       if (mode != nir_var_shader_in &&
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index 8aaa48ab568..93ebf8e78a9 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -41,6 +41,7 @@
 typedef struct {
    nir_builder b;
    const nir_lower_tex_options *options;
+   bool progress;
 } lower_tex_state;
 
 static void
@@ -133,6 +134,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
    txs->op = nir_texop_txs;
    txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
    txs->sampler_index = tex->sampler_index;
+   txs->dest_type = nir_type_int;
 
    /* only single src, the lod: */
    txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
@@ -213,6 +215,66 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
    }
 }
 
+static nir_ssa_def *
+get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
+{
+   nir_const_value v;
+
+   memset(&v, 0, sizeof(v));
+
+   if (swizzle_val == 4) {
+      v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
+   } else {
+      assert(swizzle_val == 5);
+      if (type == nir_type_float)
+         v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
+      else
+         v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
+   }
+
+   return nir_build_imm(b, 4, v);
+}
+
+static void
+swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
+{
+   assert(tex->dest.is_ssa);
+
+   b->cursor = nir_after_instr(&tex->instr);
+
+   nir_ssa_def *swizzled;
+   if (tex->op == nir_texop_tg4) {
+      if (swizzle[tex->component] < 4) {
+         /* This one's easy */
+         tex->component = swizzle[tex->component];
+         return;
+      } else {
+         swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
+      }
+   } else {
+      assert(nir_tex_instr_dest_size(tex) == 4);
+      if (swizzle[0] < 4 && swizzle[1] < 4 &&
+          swizzle[2] < 4 && swizzle[3] < 4) {
+         unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
+         /* We have no 0's or 1's, just emit a swizzling MOV */
+         swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
+      } else {
+         nir_ssa_def *srcs[4];
+         for (unsigned i = 0; i < 4; i++) {
+            if (swizzle[i] < 4) {
+               srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
+            } else {
+               srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
+            }
+         }
+         swizzled = nir_vec(b, srcs, 4);
+      }
+   }
+
+   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
+                                  swizzled->parent_instr);
+}
+
 static bool
 nir_lower_tex_block(nir_block *block, void *void_state)
 {
@@ -239,15 +301,28 @@ nir_lower_tex_block(nir_block *block, void *void_state)
       /* If we are clamping any coords, we must lower projector first
        * as clamping happens *after* projection:
        */
-      if (lower_txp || sat_mask)
+      if (lower_txp || sat_mask) {
          project_src(b, tex);
+         state->progress = true;
+      }
 
       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
-          state->options->lower_rect)
+          state->options->lower_rect) {
          lower_rect(b, tex);
+         state->progress = true;
+      }
 
-      if (sat_mask)
+      if (sat_mask) {
          saturate_src(b, tex, sat_mask);
+         state->progress = true;
+      }
+
+      if (((1 << tex->sampler_index) & state->options->swizzle_result) &&
+          !nir_tex_instr_is_query(tex) &&
+          !(tex->is_shadow && tex->is_new_style_shadow)) {
+         swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]);
+         state->progress = true;
+      }
    }
 
    return true;
@@ -264,13 +339,17 @@ nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state)
                                nir_metadata_dominance);
 }
 
-void
+bool
 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
 {
    lower_tex_state state;
    state.options = options;
+   state.progress = false;
+
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
          nir_lower_tex_impl(overload->impl, &state);
    }
+
+   return state.progress;
 }
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c
index db519bf513b..6995b9d6bc1 100644
--- a/src/glsl/nir/nir_lower_two_sided_color.c
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -60,6 +60,8 @@ create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
 
    exec_list_push_tail(&shader->inputs, &var->node);
 
+   shader->num_inputs++;     /* TODO use type_size() */
+
    return var;
 }
 
diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c
index 6de981f430f..d5324b35a78 100644
--- a/src/glsl/nir/nir_metadata.c
+++ b/src/glsl/nir/nir_metadata.c
@@ -52,3 +52,39 @@ nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
 {
    impl->valid_metadata &= preserved;
 }
+
+#ifdef DEBUG
+/**
+ * Make sure passes properly invalidate metadata (part 1).
+ *
+ * Call this before running a pass to set a bogus metadata flag, which will
+ * only be preserved if the pass forgets to call nir_metadata_preserve().
+ */
+void
+nir_metadata_set_validation_flag(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         overload->impl->valid_metadata |= nir_metadata_not_properly_reset;
+      }
+   }
+}
+
+/**
+ * Make sure passes properly invalidate metadata (part 2).
+ *
+ * Call this after a pass makes progress to verify that the bogus metadata set by
+ * the earlier function was properly thrown away.  Note that passes may not call
+ * nir_metadata_preserve() if they don't actually make any changes at all.
+ */
+void
+nir_metadata_check_validation_flag(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         assert(!(overload->impl->valid_metadata &
+                  nir_metadata_not_properly_reset));
+      }
+   }
+}
+#endif
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 3c0f1da94af..37d3dfc4588 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -91,7 +91,7 @@ class Opcode(object):
 tfloat = "float"
 tint = "int"
 tbool = "bool"
-tunsigned = "unsigned"
+tuint = "uint"
 
 commutative = "commutative "
 associative = "associative "
@@ -156,7 +156,7 @@ unop("fsqrt", tfloat, "sqrtf(src0)")
 unop("fexp2", tfloat, "exp2f(src0)")
 unop("flog2", tfloat, "log2f(src0)")
 unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
+unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion
 unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
 # Float-to-boolean conversion
 unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
@@ -165,7 +165,7 @@ unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
 # Int-to-boolean conversion
 unop_convert("i2b", tint, tbool, "src0 != 0")
 unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
+unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion.
 
 unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
 unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
@@ -205,13 +205,13 @@ unop("fddy_coarse", tfloat, "0.0f")
 # Floating point pack and unpack operations.
 
 def pack_2x16(fmt):
-   unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """
+   unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
 dst.x = (uint32_t) pack_fmt_1x16(src0.x);
 dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
 """.replace("fmt", fmt))
 
 def pack_4x8(fmt):
-   unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """
+   unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
 dst.x = (uint32_t) pack_fmt_1x8(src0.x);
 dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
 dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -219,13 +219,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
 """.replace("fmt", fmt))
 
 def unpack_2x16(fmt):
-   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """
+   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
 dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
 dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
 """.replace("fmt", fmt))
 
 def unpack_4x8(fmt):
-   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """
+   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
 dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
 dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
 dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -248,22 +248,22 @@ unpack_2x16("half")
 # Lowered floating point unpacking operations.
 
 
-unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned,
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
            "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
-unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned,
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
            "unpack_half_1x16((uint16_t)(src0.x >> 16))")
 
 
 # Bit operations, part of ARB_gpu_shader5.
 
 
-unop("bitfield_reverse", tunsigned, """
+unop("bitfield_reverse", tuint, """
 /* we're not winning any awards for speed here, but that's ok */
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++)
    dst |= ((src0 >> bit) & 1) << (31 - bit);
 """)
-unop("bit_count", tunsigned, """
+unop("bit_count", tuint, """
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++) {
    if ((src0 >> bit) & 1)
@@ -271,7 +271,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
 }
 """)
 
-unop_convert("ufind_msb", tunsigned, tint, """
+unop_convert("ufind_msb", tuint, tint, """
 dst = -1;
 for (int bit = 31; bit > 0; bit--) {
    if ((src0 >> bit) & 1) {
@@ -358,25 +358,25 @@ binop("imul", tint, commutative + associative, "src0 * src1")
 binop("imul_high", tint, commutative,
       "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
 # high 32-bits of unsigned integer multiply
-binop("umul_high", tunsigned, commutative,
+binop("umul_high", tuint, commutative,
       "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
 
 binop("fdiv", tfloat, "", "src0 / src1")
 binop("idiv", tint, "", "src0 / src1")
-binop("udiv", tunsigned, "", "src0 / src1")
+binop("udiv", tuint, "", "src0 / src1")
 
 # returns a boolean representing the carry resulting from the addition of
 # the two unsigned arguments.
 
-binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
+binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0")
 
 # returns a boolean representing the borrow resulting from the subtraction
 # of the two unsigned arguments.
 
-binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
+binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0")
 
 binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
-binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
+binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
 
 #
 # Comparisons
@@ -393,8 +393,8 @@ binop_compare("ilt", tint, "", "src0 < src1")
 binop_compare("ige", tint, "", "src0 >= src1")
 binop_compare("ieq", tint, commutative, "src0 == src1")
 binop_compare("ine", tint, commutative, "src0 != src1")
-binop_compare("ult", tunsigned, "", "src0 < src1")
-binop_compare("uge", tunsigned, "", "src0 >= src1")
+binop_compare("ult", tuint, "", "src0 < src1")
+binop_compare("uge", tuint, "", "src0 >= src1")
 
 # integer-aware GLSL-style comparisons that compare floats and ints
 
@@ -425,7 +425,7 @@ binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not E
 
 binop("ishl", tint, "", "src0 << src1")
 binop("ishr", tint, "", "src0 >> src1")
-binop("ushr", tunsigned, "", "src0 >> src1")
+binop("ushr", tuint, "", "src0 >> src1")
 
 # bitwise logic operators
 #
@@ -433,9 +433,9 @@ binop("ushr", tunsigned, "", "src0 >> src1")
 # integers.
 
 
-binop("iand", tunsigned, commutative + associative, "src0 & src1")
-binop("ior", tunsigned, commutative + associative, "src0 | src1")
-binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
+binop("iand", tuint, commutative + associative, "src0 & src1")
+binop("ior", tuint, commutative + associative, "src0 | src1")
+binop("ixor", tuint, commutative + associative, "src0 ^ src1")
 
 
 # floating point logic operators
@@ -463,10 +463,10 @@ opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
 
 binop("fmin", tfloat, "", "fminf(src0, src1)")
 binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
-binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1")
 binop("fmax", tfloat, "", "fmaxf(src0, src1)")
 binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
-binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
+binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
 
 # Saturated vector add for 4 8bit ints.
 binop("usadd_4x8", tint, commutative + associative, """
@@ -515,10 +515,10 @@ for (int i = 0; i < 32; i += 8) {
 
 binop("fpow", tfloat, "", "powf(src0, src1)")
 
-binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
+binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
             "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
 
-binop_convert("bfm", tunsigned, tint, "", """
+binop_convert("bfm", tuint, tint, "", """
 int offset = src0, bits = src1;
 if (offset < 0 || bits < 0 || offset + bits > 32)
    dst = 0; /* undefined per the spec */
@@ -535,7 +535,7 @@ if (!isnormal(dst))
 
 # Combines the first component of each input to make a 2-component vector.
 
-binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """
+binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
 dst.x = src0.x;
 dst.y = src1.x;
 """)
@@ -543,9 +543,9 @@ dst.y = src1.x;
 def triop(name, ty, const_expr):
    opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
 def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
-   opcode(name, output_size, tunsigned,
+   opcode(name, output_size, tuint,
    [src1_size, src2_size, src3_size],
-   [tunsigned, tunsigned, tunsigned], "", const_expr)
+   [tuint, tuint, tuint], "", const_expr)
 
 triop("ffma", tfloat, "src0 * src1 + src2")
 
@@ -559,11 +559,11 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
 
 
 triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
-opcode("bcsel", 0, tunsigned, [0, 0, 0],
-      [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
+opcode("bcsel", 0, tuint, [0, 0, 0],
+      [tbool, tuint, tuint], "", "src0 ? src1 : src2")
 
-triop("bfi", tunsigned, """
-unsigned mask = src0, insert = src1 & mask, base = src2;
+triop("bfi", tuint, """
+unsigned mask = src0, insert = src1, base = src2;
 if (mask == 0) {
    dst = base;
 } else {
@@ -572,12 +572,12 @@ if (mask == 0) {
       tmp >>= 1;
       insert <<= 1;
    }
-   dst = (base & ~mask) | insert;
+   dst = (base & ~mask) | (insert & mask);
 }
 """)
 
-opcode("ubitfield_extract", 0, tunsigned,
-       [0, 1, 1], [tunsigned, tint, tint], "", """
+opcode("ubitfield_extract", 0, tuint,
+       [0, 1, 1], [tuint, tint, tint], "", """
 unsigned base = src0;
 int offset = src1.x, bits = src2.x;
 if (bits == 0) {
@@ -611,13 +611,13 @@ dst.z = src2.x;
 
 def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
                  src4_size, const_expr):
-   opcode(name, output_size, tunsigned,
+   opcode(name, output_size, tuint,
           [src1_size, src2_size, src3_size, src4_size],
-          [tunsigned, tunsigned, tunsigned, tunsigned],
+          [tuint, tuint, tuint, tuint],
           "", const_expr)
 
-opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1],
-       [tunsigned, tunsigned, tint, tint], "", """
+opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1],
+       [tuint, tuint, tint, tint], "", """
 unsigned base = src0, insert = src1;
 int offset = src2.x, bits = src3.x;
 if (bits == 0) {
diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c
index 7d8bdd7f2ca..cfc8e331128 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -55,10 +55,15 @@ static bool is_move(nir_alu_instr *instr)
 
 static bool is_vec(nir_alu_instr *instr)
 {
-   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
       if (!instr->src[i].src.is_ssa)
          return false;
 
+      /* we handle modifiers in a separate pass */
+      if (instr->src[i].abs || instr->src[i].negate)
+         return false;
+   }
+
    return instr->op == nir_op_vec2 ||
           instr->op == nir_op_vec3 ||
           instr->op == nir_op_vec4;
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 2db209d434d..76bfc47c2a0 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -512,7 +512,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
    case nir_texop_texture_samples:
       fprintf(fp, "texture_samples ");
       break;
-
+   case nir_texop_samples_identical:
+      fprintf(fp, "samples_identical ");
+      break;
    default:
       unreachable("Invalid texture operation");
       break;
@@ -985,6 +987,16 @@ nir_print_shader(nir_shader *shader, FILE *fp)
 
    fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
 
+   if (shader->info.name)
+      fprintf(fp, "name: %s\n", shader->info.name);
+
+   if (shader->info.label)
+      fprintf(fp, "label: %s\n", shader->info.label);
+
+   fprintf(fp, "inputs: %u\n", shader->num_inputs);
+   fprintf(fp, "outputs: %u\n", shader->num_outputs);
+   fprintf(fp, "uniforms: %u\n", shader->num_uniforms);
+
    nir_foreach_variable(var, &shader->uniforms) {
       print_var_decl(var, &state);
    }
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c
index bb154407914..56d7e8162f3 100644
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -166,7 +166,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
          }
          return true;
       case nir_type_int:
-      case nir_type_unsigned:
+      case nir_type_uint:
       case nir_type_bool:
          for (unsigned i = 0; i < num_components; ++i) {
             if (load->value.i[new_swizzle[i]] != const_val->data.i)
@@ -310,7 +310,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,
          load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
          load->value.i[0] = c->data.i;
          break;
-      case nir_type_unsigned:
+      case nir_type_uint:
       case nir_type_bool:
          load->value.u[0] = c->data.u;
          break;
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index ed374b921fa..06879d64ee2 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -290,11 +290,11 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
 {
    assert(instr->op < nir_num_opcodes);
 
-   validate_alu_dest(&instr->dest, state);
-
    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
       validate_alu_src(instr, i, state);
    }
+
+   validate_alu_dest(&instr->dest, state);
 }
 
 static void
@@ -375,6 +375,11 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       validate_src(&instr->src[i], state);
    }
 
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++) {
+      validate_deref_var(instr, instr->variables[i], state);
+   }
+
    if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
       unsigned components_written =
          nir_intrinsic_infos[instr->intrinsic].dest_components;
@@ -392,11 +397,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       validate_dest(&instr->dest, state);
    }
 
-   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
-   for (unsigned i = 0; i < num_vars; i++) {
-      validate_deref_var(instr, instr->variables[i], state);
-   }
-
    switch (instr->intrinsic) {
    case nir_intrinsic_load_var: {
       const struct glsl_type *type =
@@ -434,8 +434,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
 static void
 validate_tex_instr(nir_tex_instr *instr, validate_state *state)
 {
-   validate_dest(&instr->dest, state);
-
    bool src_type_seen[nir_num_tex_src_types];
    for (unsigned i = 0; i < nir_num_tex_src_types; i++)
       src_type_seen[i] = false;
@@ -448,6 +446,8 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
 
    if (instr->sampler != NULL)
       validate_deref_var(instr, instr->sampler, state);
+
+   validate_dest(&instr->dest, state);
 }
 
 static void
diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c
index 70610ca0f66..86282d25e0a 100644
--- a/src/glsl/nir/spirv_to_nir.c
+++ b/src/glsl/nir/spirv_to_nir.c
@@ -2026,7 +2026,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
    switch (glsl_get_sampler_result_type(sampler_type)) {
    case GLSL_TYPE_FLOAT:   instr->dest_type = nir_type_float;     break;
    case GLSL_TYPE_INT:     instr->dest_type = nir_type_int;       break;
-   case GLSL_TYPE_UINT:    instr->dest_type = nir_type_unsigned;  break;
+   case GLSL_TYPE_UINT:    instr->dest_type = nir_type_uint;  break;
    case GLSL_TYPE_BOOL:    instr->dest_type = nir_type_bool;      break;
    default:
       unreachable("Invalid base type for sampler result");
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index e38a0e93058..cd58213c019 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -275,6 +275,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
    case ir_lod:
    case ir_query_levels:
    case ir_texture_samples:
+   case ir_samples_identical:
       break;
    case ir_txb:
       if (do_graft(&ir->lod_info.bias))