48 files changed, 2037 insertions, 981 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 0c9fd75d206..e64c31e17c6 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -160,6 +160,8 @@ LIBGLSL_FILES = \
 	loop_analysis.h \
 	loop_controls.cpp \
 	loop_unroll.cpp \
+	lower_buffer_access.cpp \
+	lower_buffer_access.h \
 	lower_clip_distance.cpp \
 	lower_const_arrays_to_uniforms.cpp \
 	lower_discard.cpp \
@@ -184,6 +186,7 @@ LIBGLSL_FILES = \
 	lower_vector_insert.cpp \
 	lower_vertex_id.cpp \
 	lower_output_reads.cpp \
+	lower_shared_reference.cpp \
 	lower_ubo_reference.cpp \
 	opt_algebraic.cpp \
 	opt_array_splitting.cpp \
diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 3bea63ea0ed..adfc7938bff 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -699,16 +699,16 @@ struct ast_type_qualifier {
 
    bool merge_qualifier(YYLTYPE *loc,
 			_mesa_glsl_parse_state *state,
-			ast_type_qualifier q);
+			const ast_type_qualifier &q);
 
    bool merge_out_qualifier(YYLTYPE *loc,
                            _mesa_glsl_parse_state *state,
-                           ast_type_qualifier q,
+                           const ast_type_qualifier &q,
                            ast_node* &node);
 
    bool merge_in_qualifier(YYLTYPE *loc,
                            _mesa_glsl_parse_state *state,
-                           ast_type_qualifier q,
+                           const ast_type_qualifier &q,
                            ast_node* &node);
 
    ast_subroutine_list *subroutine_list;
@@ -1152,7 +1152,7 @@ class ast_cs_input_layout : public ast_node
 {
 public:
    ast_cs_input_layout(const struct YYLTYPE &locp,
-                       ast_layout_expression **local_size)
+                       ast_layout_expression *const *local_size)
    {
       for (int i = 0; i < 3; i++) {
          this->local_size[i] = local_size[i];
@@ -1197,6 +1197,6 @@ check_builtin_array_max_size(const char *name, unsigned size,
 extern void _mesa_ast_process_interface_block(YYLTYPE *locp,
                                               _mesa_glsl_parse_state *state,
                                               ast_interface_block *const block,
-                                              const struct ast_type_qualifier q);
+                                              const struct ast_type_qualifier &q);
 
 #endif /* AST_H */
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 466ece67424..e32a588f091 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -143,19 +143,21 @@ verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
 }
 
 static bool
-verify_first_atomic_ssbo_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
+verify_first_atomic_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
                                    ir_variable *var)
 {
-   if (!var || !var->is_in_shader_storage_block()) {
+   if (!var ||
+       (!var->is_in_shader_storage_block() &&
+        var->data.mode != ir_var_shader_shared)) {
       _mesa_glsl_error(loc, state, "First argument to atomic function "
-                       "must be a buffer variable");
+                       "must be a buffer or shared variable");
       return false;
    }
    return true;
 }
 
 static bool
-is_atomic_ssbo_function(const char *func_name)
+is_atomic_function(const char *func_name)
 {
    return !strcmp(func_name, "atomicAdd") ||
           !strcmp(func_name, "atomicMin") ||
@@ -276,16 +278,16 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
 
    /* The first parameter of atomic functions must be a buffer variable */
    const char *func_name = sig->function_name();
-   bool is_atomic_ssbo = is_atomic_ssbo_function(func_name);
-   if (is_atomic_ssbo) {
+   bool is_atomic = is_atomic_function(func_name);
+   if (is_atomic) {
       const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head;
 
       const ast_expression *const actual_ast =
          exec_node_data(ast_expression, actual_ast_parameters.head, link);
       YYLTYPE loc = actual_ast->get_location();
 
-      if (!verify_first_atomic_ssbo_parameter(&loc, state,
-                                              actual->variable_referenced())) {
+      if (!verify_first_atomic_parameter(&loc, state,
+                                         actual->variable_referenced())) {
          return false;
       }
    }
@@ -1737,7 +1739,7 @@ ast_function_expression::handle_method(exec_list *instructions,
             result = new(ctx) ir_constant(op->type->array_size());
          }
       } else if (op->type->is_vector()) {
-         if (state->ARB_shading_language_420pack_enable) {
+         if (state->has_420pack()) {
             /* .length() returns int. */
             result = new(ctx) ir_constant((int) op->type->vector_elements);
          } else {
@@ -1746,7 +1748,7 @@ ast_function_expression::handle_method(exec_list *instructions,
             goto fail;
          }
       } else if (op->type->is_matrix()) {
-         if (state->ARB_shading_language_420pack_enable) {
+         if (state->has_420pack()) {
             /* .length() returns int. */
             result = new(ctx) ir_constant((int) op->type->matrix_columns);
          } else {
@@ -2075,7 +2077,7 @@ ast_aggregate_initializer::hir(exec_list *instructions,
    }
    const glsl_type *const constructor_type = this->constructor_type;
 
-   if (!state->ARB_shading_language_420pack_enable) {
+   if (!state->has_420pack()) {
       _mesa_glsl_error(&loc, state, "C-style initialization requires the "
                        "GL_ARB_shading_language_420pack extension");
       return ir_rvalue::error_value(ctx);
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 52881a4da7a..fc6bb3e31f1 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -1825,7 +1825,7 @@ ast_expression::do_hir(exec_list *instructions,
        * tree.  This particular use must be at location specified in the grammar
        * as 'variable_identifier'.
        */
-      ir_variable *var = 
+      ir_variable *var =
          state->symbols->get_variable(this->primary_expression.identifier);
 
       if (var != NULL) {
@@ -2650,7 +2650,9 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
 
          return;
       }
-   } else if (state->is_version(420, 310) && base_type->is_image()) {
+   } else if ((state->is_version(420, 310) ||
+               state->ARB_shading_language_420pack_enable) &&
+              base_type->is_image()) {
       assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
       if (max_index >= ctx->Const.MaxImageUnits) {
          _mesa_glsl_error(loc, state, "Image binding %d exceeds the "
@@ -3737,7 +3739,7 @@ process_initializer(ir_variable *var, ast_declaration *decl,
              * expressions. Const-qualified global variables must still be
              * initialized with constant expressions.
              */
-            if (!state->ARB_shading_language_420pack_enable
+            if (!state->has_420pack()
                 || state->current_function == NULL) {
                _mesa_glsl_error(& initializer_loc, state,
                                 "initializer of %s variable `%s' must be a "
@@ -5366,7 +5368,7 @@ ast_jump_statement::hir(exec_list *instructions,
          if (state->current_function->return_type != ret_type) {
             YYLTYPE loc = this->get_location();
 
-            if (state->ARB_shading_language_420pack_enable) {
+            if (state->has_420pack()) {
                if (!apply_implicit_conversion(state->current_function->return_type,
                                               ret, state)) {
                   _mesa_glsl_error(& loc, state,
@@ -5558,8 +5560,8 @@ ast_switch_statement::hir(exec_list *instructions,
 
    /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec:
     *
-    *    "The type of init-expression in a switch statement must be a 
-    *     scalar integer." 
+    *    "The type of init-expression in a switch statement must be a
+    *     scalar integer."
     */
    if (!test_expression->type->is_scalar() ||
        !test_expression->type->is_integer()) {
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 03ed4dcfa2a..8643b7bfb76 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -116,7 +116,7 @@ ast_type_qualifier::interpolation_string() const
 bool
 ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
 				    _mesa_glsl_parse_state *state,
-				    ast_type_qualifier q)
+				    const ast_type_qualifier &q)
 {
    ast_type_qualifier ubo_mat_mask;
    ubo_mat_mask.flags.i = 0;
@@ -293,7 +293,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
 bool
 ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
                                         _mesa_glsl_parse_state *state,
-                                        ast_type_qualifier q,
+                                        const ast_type_qualifier &q,
                                         ast_node* &node)
 {
    void *mem_ctx = state;
@@ -309,7 +309,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
 bool
 ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
                                        _mesa_glsl_parse_state *state,
-                                       ast_type_qualifier q,
+                                       const ast_type_qualifier &q,
                                        ast_node* &node)
 {
    void *mem_ctx = state;
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 881ee2b6b55..9973a763087 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -479,6 +479,12 @@ compute_shader(const _mesa_glsl_parse_state *state)
 }
 
 static bool
+buffer_atomics_supported(const _mesa_glsl_parse_state *state)
+{
+   return compute_shader(state) || shader_storage_buffer_object(state);
+}
+
+static bool
 barrier_supported(const _mesa_glsl_parse_state *state)
 {
    return compute_shader(state) ||
@@ -606,8 +612,8 @@ private:
                                ir_expression_operation opcode,
                                const glsl_type *return_type,
                                const glsl_type *param_type);
-   ir_function_signature *binop(ir_expression_operation opcode,
-                                builtin_available_predicate avail,
+   ir_function_signature *binop(builtin_available_predicate avail,
+                                ir_expression_operation opcode,
                                 const glsl_type *return_type,
                                 const glsl_type *param0_type,
                                 const glsl_type *param1_type);
@@ -774,16 +780,16 @@ private:
    ir_function_signature *_atomic_counter_op(const char *intrinsic,
                                              builtin_available_predicate avail);
 
-   ir_function_signature *_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
-                                                  const glsl_type *type);
-   ir_function_signature *_atomic_ssbo_op2(const char *intrinsic,
-                                           builtin_available_predicate avail,
-                                           const glsl_type *type);
-   ir_function_signature *_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
-                                                  const glsl_type *type);
-   ir_function_signature *_atomic_ssbo_op3(const char *intrinsic,
-                                           builtin_available_predicate avail,
-                                           const glsl_type *type);
+   ir_function_signature *_atomic_intrinsic2(builtin_available_predicate avail,
+                                             const glsl_type *type);
+   ir_function_signature *_atomic_op2(const char *intrinsic,
+                                      builtin_available_predicate avail,
+                                      const glsl_type *type);
+   ir_function_signature *_atomic_intrinsic3(builtin_available_predicate avail,
+                                             const glsl_type *type);
+   ir_function_signature *_atomic_op3(const char *intrinsic,
+                                      builtin_available_predicate avail,
+                                      const glsl_type *type);
 
    B1(min3)
    B1(max3)
@@ -930,53 +936,53 @@ builtin_builder::create_intrinsics()
                 _atomic_counter_intrinsic(shader_atomic_counters),
                 NULL);
 
-   add_function("__intrinsic_ssbo_atomic_add",
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::uint_type),
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::int_type),
-                NULL);
-   add_function("__intrinsic_ssbo_atomic_min",
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::uint_type),
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::int_type),
-                NULL);
-   add_function("__intrinsic_ssbo_atomic_max",
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::uint_type),
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::int_type),
-                NULL);
-   add_function("__intrinsic_ssbo_atomic_and",
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::uint_type),
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::int_type),
-                NULL);
-   add_function("__intrinsic_ssbo_atomic_or",
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::uint_type),
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::int_type),
-                NULL);
-   add_function("__intrinsic_ssbo_atomic_xor",
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::uint_type),
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::int_type),
-                NULL);
-   add_function("__intrinsic_ssbo_atomic_exchange",
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::uint_type),
-                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
-                                        glsl_type::int_type),
-                NULL);
-   add_function("__intrinsic_ssbo_atomic_comp_swap",
-                _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
-                                        glsl_type::uint_type),
-                _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
-                                        glsl_type::int_type),
+   add_function("__intrinsic_atomic_add",
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::uint_type),
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_atomic_min",
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::uint_type),
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_atomic_max",
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::uint_type),
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_atomic_and",
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::uint_type),
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_atomic_or",
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::uint_type),
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_atomic_xor",
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::uint_type),
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_atomic_exchange",
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::uint_type),
+                _atomic_intrinsic2(buffer_atomics_supported,
+                                   glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_atomic_comp_swap",
+                _atomic_intrinsic3(buffer_atomics_supported,
+                                   glsl_type::uint_type),
+                _atomic_intrinsic3(buffer_atomics_supported,
+                                   glsl_type::int_type),
                 NULL);
 
    add_image_functions(false);
@@ -1336,7 +1342,7 @@ builtin_builder::create_builtins()
                 _smoothstep(fp64, glsl_type::dvec3_type,  glsl_type::dvec3_type),
                 _smoothstep(fp64, glsl_type::dvec4_type,  glsl_type::dvec4_type),
                 NULL);
- 
+
    FD130(isnan)
    FD130(isinf)
 
@@ -1373,7 +1379,7 @@ builtin_builder::create_builtins()
    FD(distance)
    FD(dot)
 
-   add_function("cross", _cross(always_available, glsl_type::vec3_type), 
+   add_function("cross", _cross(always_available, glsl_type::vec3_type),
                 _cross(fp64, glsl_type::dvec3_type), NULL);
 
    FD(normalize)
@@ -2682,68 +2688,68 @@ builtin_builder::create_builtins()
                 NULL);
 
    add_function("atomicAdd",
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
-                                 shader_storage_buffer_object,
-                                 glsl_type::uint_type),
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
-                                 shader_storage_buffer_object,
-                                 glsl_type::int_type),
+                _atomic_op2("__intrinsic_atomic_add",
+                            buffer_atomics_supported,
+                            glsl_type::uint_type),
+                _atomic_op2("__intrinsic_atomic_add",
+                            buffer_atomics_supported,
+                            glsl_type::int_type),
                 NULL);
    add_function("atomicMin",
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
-                                 shader_storage_buffer_object,
-                                 glsl_type::uint_type),
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
-                                 shader_storage_buffer_object,
-                                 glsl_type::int_type),
+                _atomic_op2("__intrinsic_atomic_min",
+                            buffer_atomics_supported,
+                            glsl_type::uint_type),
+                _atomic_op2("__intrinsic_atomic_min",
+                            buffer_atomics_supported,
+                            glsl_type::int_type),
                 NULL);
    add_function("atomicMax",
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
-                                 shader_storage_buffer_object,
-                                 glsl_type::uint_type),
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
-                                 shader_storage_buffer_object,
-                                 glsl_type::int_type),
+                _atomic_op2("__intrinsic_atomic_max",
+                            buffer_atomics_supported,
+                            glsl_type::uint_type),
+                _atomic_op2("__intrinsic_atomic_max",
+                            buffer_atomics_supported,
+                            glsl_type::int_type),
                 NULL);
    add_function("atomicAnd",
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
-                                 shader_storage_buffer_object,
-                                 glsl_type::uint_type),
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
-                                 shader_storage_buffer_object,
-                                 glsl_type::int_type),
+                _atomic_op2("__intrinsic_atomic_and",
+                            buffer_atomics_supported,
+                            glsl_type::uint_type),
+                _atomic_op2("__intrinsic_atomic_and",
+                            buffer_atomics_supported,
+                            glsl_type::int_type),
                 NULL);
    add_function("atomicOr",
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
-                                 shader_storage_buffer_object,
-                                 glsl_type::uint_type),
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
-                                 shader_storage_buffer_object,
-                                 glsl_type::int_type),
+                _atomic_op2("__intrinsic_atomic_or",
+                            buffer_atomics_supported,
+                            glsl_type::uint_type),
+                _atomic_op2("__intrinsic_atomic_or",
+                            buffer_atomics_supported,
+                            glsl_type::int_type),
                 NULL);
    add_function("atomicXor",
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
-                                 shader_storage_buffer_object,
-                                 glsl_type::uint_type),
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
-                                 shader_storage_buffer_object,
-                                 glsl_type::int_type),
+                _atomic_op2("__intrinsic_atomic_xor",
+                            buffer_atomics_supported,
+                            glsl_type::uint_type),
+                _atomic_op2("__intrinsic_atomic_xor",
+                            buffer_atomics_supported,
+                            glsl_type::int_type),
                 NULL);
    add_function("atomicExchange",
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
-                                 shader_storage_buffer_object,
-                                 glsl_type::uint_type),
-                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
-                                 shader_storage_buffer_object,
-                                 glsl_type::int_type),
+                _atomic_op2("__intrinsic_atomic_exchange",
+                            buffer_atomics_supported,
+                            glsl_type::uint_type),
+                _atomic_op2("__intrinsic_atomic_exchange",
+                            buffer_atomics_supported,
+                            glsl_type::int_type),
                 NULL);
    add_function("atomicCompSwap",
-                _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
-                                 shader_storage_buffer_object,
-                                 glsl_type::uint_type),
-                _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
-                                 shader_storage_buffer_object,
-                                 glsl_type::int_type),
+                _atomic_op3("__intrinsic_atomic_comp_swap",
+                            buffer_atomics_supported,
+                            glsl_type::uint_type),
+                _atomic_op3("__intrinsic_atomic_comp_swap",
+                            buffer_atomics_supported,
+                            glsl_type::int_type),
                 NULL);
 
    add_function("min3",
@@ -3114,8 +3120,8 @@ builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *typ
 }
 
 ir_function_signature *
-builtin_builder::binop(ir_expression_operation opcode,
-                       builtin_available_predicate avail,
+builtin_builder::binop(builtin_available_predicate avail,
+                       ir_expression_operation opcode,
                        const glsl_type *return_type,
                        const glsl_type *param0_type,
                        const glsl_type *param1_type)
@@ -3411,7 +3417,7 @@ builtin_builder::_atanh(const glsl_type *type)
 ir_function_signature *
 builtin_builder::_pow(const glsl_type *type)
 {
-   return binop(ir_binop_pow, always_available, type, type, type);
+   return binop(always_available, ir_binop_pow, type, type, type);
 }
 
 UNOP(exp,         ir_unop_exp,  always_available)
@@ -3435,7 +3441,7 @@ UNOPA(fract,     ir_unop_fract)
 ir_function_signature *
 builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
 {
-   return binop(ir_binop_mod, always_available, x_type, x_type, y_type);
+   return binop(always_available, ir_binop_mod, x_type, x_type, y_type);
 }
 
 ir_function_signature *
@@ -3457,14 +3463,14 @@ ir_function_signature *
 builtin_builder::_min(builtin_available_predicate avail,
                       const glsl_type *x_type, const glsl_type *y_type)
 {
-   return binop(ir_binop_min, avail, x_type, x_type, y_type);
+   return binop(avail, ir_binop_min, x_type, x_type, y_type);
 }
 
 ir_function_signature *
 builtin_builder::_max(builtin_available_predicate avail,
                       const glsl_type *x_type, const glsl_type *y_type)
 {
-   return binop(ir_binop_max, avail, x_type, x_type, y_type);
+   return binop(avail, ir_binop_max, x_type, x_type, y_type);
 }
 
 ir_function_signature *
@@ -3793,9 +3799,9 @@ ir_function_signature *
 builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type)
 {
    if (type->vector_elements == 1)
-      return binop(ir_binop_mul, avail, type, type, type);
+      return binop(avail, ir_binop_mul, type, type, type);
 
-   return binop(ir_binop_dot, avail,
+   return binop(avail, ir_binop_dot,
                 type->get_base_type(), type, type);
 }
 
@@ -4311,7 +4317,7 @@ ir_function_signature *
 builtin_builder::_lessThan(builtin_available_predicate avail,
                            const glsl_type *type)
 {
-   return binop(ir_binop_less, avail,
+   return binop(avail, ir_binop_less,
                 glsl_type::bvec(type->vector_elements), type, type);
 }
 
@@ -4319,7 +4325,7 @@ ir_function_signature *
 builtin_builder::_lessThanEqual(builtin_available_predicate avail,
                                 const glsl_type *type)
 {
-   return binop(ir_binop_lequal, avail,
+   return binop(avail, ir_binop_lequal,
                 glsl_type::bvec(type->vector_elements), type, type);
 }
 
@@ -4327,7 +4333,7 @@ ir_function_signature *
 builtin_builder::_greaterThan(builtin_available_predicate avail,
                               const glsl_type *type)
 {
-   return binop(ir_binop_greater, avail,
+   return binop(avail, ir_binop_greater,
                 glsl_type::bvec(type->vector_elements), type, type);
 }
 
@@ -4335,7 +4341,7 @@ ir_function_signature *
 builtin_builder::_greaterThanEqual(builtin_available_predicate avail,
                                    const glsl_type *type)
 {
-   return binop(ir_binop_gequal, avail,
+   return binop(avail, ir_binop_gequal,
                 glsl_type::bvec(type->vector_elements), type, type);
 }
 
@@ -4343,7 +4349,7 @@ ir_function_signature *
 builtin_builder::_equal(builtin_available_predicate avail,
                         const glsl_type *type)
 {
-   return binop(ir_binop_equal, avail,
+   return binop(avail, ir_binop_equal,
                 glsl_type::bvec(type->vector_elements), type, type);
 }
 
@@ -4351,7 +4357,7 @@ ir_function_signature *
 builtin_builder::_notEqual(builtin_available_predicate avail,
                            const glsl_type *type)
 {
-   return binop(ir_binop_nequal, avail,
+   return binop(avail, ir_binop_nequal,
                 glsl_type::bvec(type->vector_elements), type, type);
 }
 
@@ -4939,7 +4945,8 @@ builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type)
 ir_function_signature *
 builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type)
 {
-   return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, x_type, x_type, exp_type);
+   return binop(x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31,
+                ir_binop_ldexp, x_type, x_type, exp_type);
 }
 
 ir_function_signature *
@@ -5096,8 +5103,8 @@ builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail)
 }
 
 ir_function_signature *
-builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
-                                         const glsl_type *type)
+builtin_builder::_atomic_intrinsic2(builtin_available_predicate avail,
+                                    const glsl_type *type)
 {
    ir_variable *atomic = in_var(type, "atomic");
    ir_variable *data = in_var(type, "data");
@@ -5106,8 +5113,8 @@ builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
 }
 
 ir_function_signature *
-builtin_builder::_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
-                                         const glsl_type *type)
+builtin_builder::_atomic_intrinsic3(builtin_available_predicate avail,
+                                    const glsl_type *type)
 {
    ir_variable *atomic = in_var(type, "atomic");
    ir_variable *data1 = in_var(type, "data1");
@@ -5131,9 +5138,9 @@ builtin_builder::_atomic_counter_op(const char *intrinsic,
 }
 
 ir_function_signature *
-builtin_builder::_atomic_ssbo_op2(const char *intrinsic,
-                                  builtin_available_predicate avail,
-                                  const glsl_type *type)
+builtin_builder::_atomic_op2(const char *intrinsic,
+                             builtin_available_predicate avail,
+                             const glsl_type *type)
 {
    ir_variable *atomic = in_var(type, "atomic_var");
    ir_variable *data = in_var(type, "atomic_data");
@@ -5147,9 +5154,9 @@ builtin_builder::_atomic_ssbo_op2(const char *intrinsic,
 }
 
 ir_function_signature *
-builtin_builder::_atomic_ssbo_op3(const char *intrinsic,
-                                  builtin_available_predicate avail,
-                                  const glsl_type *type)
+builtin_builder::_atomic_op3(const char *intrinsic,
+                             builtin_available_predicate avail,
+                             const glsl_type *type)
 {
    ir_variable *atomic = in_var(type, "atomic_var");
    ir_variable *data1 = in_var(type, "atomic_data1");
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 5a8f98019d1..7eb383ac60c 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -948,7 +948,7 @@ parameter_qualifier:
       if (($1.flags.q.in || $1.flags.q.out) && ($2.flags.q.in || $2.flags.q.out))
          _mesa_glsl_error(&@1, state, "duplicate in/out/inout qualifier");
 
-      if (!state->has_420pack() && $2.flags.q.constant)
+      if (!state->has_420pack_or_es31() && $2.flags.q.constant)
          _mesa_glsl_error(&@1, state, "in/out/inout must come after const "
                                       "or precise");
 
@@ -960,7 +960,7 @@ parameter_qualifier:
       if ($2.precision != ast_precision_none)
          _mesa_glsl_error(&@1, state, "duplicate precision qualifier");
 
-      if (!(state->has_420pack() || state->is_version(420, 310)) &&
+      if (!state->has_420pack_or_es31() &&
           $2.flags.i != 0)
          _mesa_glsl_error(&@1, state, "precision qualifiers must come last");
 
@@ -1482,7 +1482,7 @@ layout_qualifier_id:
          $$.index = $3;
       }
 
-      if ((state->has_420pack() ||
+      if ((state->has_420pack_or_es31() ||
            state->has_atomic_counters() ||
            state->has_shader_storage_buffer_objects()) &&
           match_layout_qualifier("binding", $1, state) == 0) {
@@ -1714,7 +1714,7 @@ type_qualifier:
       if ($2.flags.q.invariant)
          _mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier");
 
-      if (!state->has_420pack() && $2.flags.q.precise)
+      if (!state->has_420pack_or_es31() && $2.flags.q.precise)
          _mesa_glsl_error(&@1, state,
                           "\"invariant\" must come after \"precise\"");
 
@@ -1747,7 +1747,7 @@ type_qualifier:
       if ($2.has_interpolation())
          _mesa_glsl_error(&@1, state, "duplicate interpolation qualifier");
 
-      if (!state->has_420pack() &&
+      if (!state->has_420pack_or_es31() &&
           ($2.flags.q.precise || $2.flags.q.invariant)) {
          _mesa_glsl_error(&@1, state, "interpolation qualifiers must come "
                           "after \"precise\" or \"invariant\"");
@@ -1767,7 +1767,7 @@ type_qualifier:
        * precise qualifiers since these are useful in ARB_separate_shader_objects.
        * There is no clear spec guidance on this either.
        */
-      if (!state->has_420pack() && $2.has_layout())
+      if (!state->has_420pack_or_es31() && $2.has_layout())
          _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
 
       $$ = $1;
@@ -1785,7 +1785,7 @@ type_qualifier:
                           "duplicate auxiliary storage qualifier (centroid or sample)");
       }
 
-      if (!state->has_420pack() &&
+      if (!state->has_420pack_or_es31() &&
           ($2.flags.q.precise || $2.flags.q.invariant ||
            $2.has_interpolation() || $2.has_layout())) {
          _mesa_glsl_error(&@1, state, "auxiliary storage qualifiers must come "
@@ -1803,7 +1803,7 @@ type_qualifier:
       if ($2.has_storage())
          _mesa_glsl_error(&@1, state, "duplicate storage qualifier");
 
-      if (!state->has_420pack() &&
+      if (!state->has_420pack_or_es31() &&
           ($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() ||
            $2.has_layout() || $2.has_auxiliary_storage())) {
          _mesa_glsl_error(&@1, state, "storage qualifiers must come after "
@@ -1819,7 +1819,7 @@ type_qualifier:
       if ($2.precision != ast_precision_none)
          _mesa_glsl_error(&@1, state, "duplicate precision qualifier");
 
-      if (!(state->has_420pack() || state->is_version(420, 310)) &&
+      if (!(state->has_420pack_or_es31()) &&
           $2.flags.i != 0)
          _mesa_glsl_error(&@1, state, "precision qualifiers must come last");
 
@@ -2575,7 +2575,7 @@ interface_block:
    {
       ast_interface_block *block = (ast_interface_block *) $2;
 
-      if (!state->has_420pack() && block->layout.has_layout() &&
+      if (!state->has_420pack_or_es31() && block->layout.has_layout() &&
           !block->layout.is_default_qualifier) {
          _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
          YYERROR;
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index b41b64af2c1..3988376ea9d 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -479,7 +479,7 @@ _mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
    struct gl_context *ctx = state->ctx;
 
    /* Report the error via GL_ARB_debug_output. */
-   _mesa_shader_debug(ctx, type, &msg_id, msg, strlen(msg));
+   _mesa_shader_debug(ctx, type, &msg_id, msg);
 
    ralloc_strcat(&state->info_log, "\n");
 }
@@ -876,7 +876,7 @@ void
 _mesa_ast_process_interface_block(YYLTYPE *locp,
                                   _mesa_glsl_parse_state *state,
                                   ast_interface_block *const block,
-                                  const struct ast_type_qualifier q)
+                                  const struct ast_type_qualifier &q)
 {
    if (q.flags.q.buffer) {
       if (!state->has_shader_storage_buffer_objects()) {
@@ -1088,7 +1088,7 @@ void
 ast_compound_statement::print(void) const
 {
    printf("{\n");
-   
+
    foreach_list_typed(ast_node, ast, link, &this->statements) {
       ast->print();
    }
@@ -1414,7 +1414,6 @@ ast_selection_statement::print(void) const
       printf("else ");
       else_statement->print();
    }
-   
 }
 
 
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 17ff0b5af79..a4bda772a0f 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -97,7 +97,7 @@ struct _mesa_glsl_parse_state {
     * supports the feature.
     *
     * \param required_glsl_es_version is the GLSL ES version that is required
-    * to support the feature, or 0 if no version of GLSL ES suports the
+    * to support the feature, or 0 if no version of GLSL ES supports the
     * feature.
     */
    bool is_version(unsigned required_glsl_version,
@@ -255,6 +255,11 @@ struct _mesa_glsl_parse_state {
       return ARB_shading_language_420pack_enable || is_version(420, 0);
    }
 
+   bool has_420pack_or_es31() const
+   {
+      return ARB_shading_language_420pack_enable || is_version(420, 310);
+   }
+
    bool has_compute_shader() const
    {
       return ARB_compute_shader_enable || is_version(430, 310);
diff --git a/src/glsl/hir_field_selection.cpp b/src/glsl/hir_field_selection.cpp
index 337095b95b8..92bb4139194 100644
--- a/src/glsl/hir_field_selection.cpp
+++ b/src/glsl/hir_field_selection.cpp
@@ -57,8 +57,7 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr,
 			  expr->primary_expression.identifier);
       }
    } else if (op->type->is_vector() ||
-              (state->ARB_shading_language_420pack_enable &&
-               op->type->is_scalar())) {
+              (state->has_420pack() && op->type->is_scalar())) {
       ir_swizzle *swiz = ir_swizzle::create(op,
 					    expr->primary_expression.identifier,
 					    op->type->vector_elements);
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index ca520f547a1..f989e9b6dff 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1669,6 +1669,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
    this->data.pixel_center_integer = false;
    this->data.depth_layout = ir_depth_layout_none;
    this->data.used = false;
+   this->data.always_active_io = false;
    this->data.read_only = false;
    this->data.centroid = false;
    this->data.sample = false;
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index e1109eec1d3..bdc932ef538 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -659,6 +659,13 @@ public:
       unsigned assigned:1;
 
       /**
+       * When separate shader programs are enabled, only input/outputs between
+       * the stages of a multi-stage separate program can be safely removed
+       * from the shader interface. Other input/outputs must remains active.
+       */
+      unsigned always_active_io:1;
+
+      /**
        * Enum indicating how the variable was declared.  See
        * ir_var_declaration_type.
        *
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 67ed3605a8c..ef705851613 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -41,14 +41,6 @@
 #include "glsl_types.h"
 #include "program/hash_table.h"
 
-#if defined(__SUNPRO_CC) && !defined(isnormal)
-#include <ieeefp.h>
-static int isnormal(double x)
-{
-   return fpclass(x) == FP_NORMAL;
-}
-#endif
-
 static float
 dot_f(ir_constant *op0, ir_constant *op1)
 {
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 2fee81c09c2..dabd80a8d0d 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -124,6 +124,7 @@ bool lower_const_arrays_to_uniforms(exec_list *instructions);
 bool lower_clip_distance(gl_shader *shader);
 void lower_output_reads(unsigned stage, exec_list *instructions);
 bool lower_packing_builtins(exec_list *instructions, int op_mask);
+void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
 void lower_ubo_reference(struct gl_shader *shader);
 void lower_packed_varyings(void *mem_ctx,
                            unsigned locations_used, ir_variable_mode mode,
diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp
index 07720e28749..7c0af1b712f 100644
--- a/src/glsl/ir_reader.cpp
+++ b/src/glsl/ir_reader.cpp
@@ -93,7 +93,7 @@ ir_reader::read(exec_list *instructions, const char *src, bool scan_for_protos)
       ir_read_error(NULL, "couldn't parse S-Expression.");
       return;
    }
-   
+
    if (scan_for_protos) {
       scan_for_prototypes(instructions, expr);
       if (state->error)
@@ -147,7 +147,7 @@ ir_reader::read_type(s_expression *expr)
 
       return glsl_type::get_array_instance(base_type, s_size->value());
    }
-   
+
    s_symbol *type_sym = SX_AS_SYMBOL(expr);
    if (type_sym == NULL) {
       ir_read_error(expr, "expected <type>");
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index c0b4b3e820c..71750d1b42b 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -766,7 +766,7 @@ public:
                    gl_shader_stage consumer_stage);
    ~varying_matches();
    void record(ir_variable *producer_var, ir_variable *consumer_var);
-   unsigned assign_locations(uint64_t reserved_slots);
+   unsigned assign_locations(uint64_t reserved_slots, bool separate_shader);
    void store_locations() const;
 
 private:
@@ -896,8 +896,10 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
 {
    assert(producer_var != NULL || consumer_var != NULL);
 
-   if ((producer_var && !producer_var->data.is_unmatched_generic_inout)
-       || (consumer_var && !consumer_var->data.is_unmatched_generic_inout)) {
+   if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
+       producer_var->data.explicit_location)) ||
+       (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
+       consumer_var->data.explicit_location))) {
       /* Either a location already exists for this variable (since it is part
        * of fixed functionality), or it has already been recorded as part of a
        * previous match.
@@ -986,11 +988,36 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
  * passed to varying_matches::record().
  */
 unsigned
-varying_matches::assign_locations(uint64_t reserved_slots)
+varying_matches::assign_locations(uint64_t reserved_slots, bool separate_shader)
 {
-   /* Sort varying matches into an order that makes them easy to pack. */
-   qsort(this->matches, this->num_matches, sizeof(*this->matches),
-         &varying_matches::match_comparator);
+   /* We disable varying sorting for separate shader programs for the
+    * following reasons:
+    *
+    * 1/ All programs must sort the code in the same order to guarantee the
+    *    interface matching. However varying_matches::record() will change the
+    *    interpolation qualifier of some stages.
+    *
+    * 2/ GLSL version 4.50 removes the matching constrain on the interpolation
+    *    qualifier.
+    *
+    * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec:
+    *
+    *    "The type and presence of interpolation qualifiers of variables with
+    *    the same name declared in all linked shaders for the same cross-stage
+    *    interface must match, otherwise the link command will fail.
+    *
+    *    When comparing an output from one stage to an input of a subsequent
+    *    stage, the input and output don't match if their interpolation
+    *    qualifiers (or lack thereof) are not the same."
+    *
+    *    "It is a link-time error if, within the same stage, the interpolation
+    *    qualifiers of variables of the same name do not match."
+    */
+   if (!separate_shader) {
+      /* Sort varying matches into an order that makes them easy to pack. */
+      qsort(this->matches, this->num_matches, sizeof(*this->matches),
+            &varying_matches::match_comparator);
+   }
 
    unsigned generic_location = 0;
    unsigned generic_patch_location = MAX_VARYING*4;
@@ -1590,7 +1617,8 @@ assign_varying_locations(struct gl_context *ctx,
       reserved_varying_slot(producer, ir_var_shader_out) |
       reserved_varying_slot(consumer, ir_var_shader_in);
 
-   const unsigned slots_used = matches.assign_locations(reserved_slots);
+   const unsigned slots_used = matches.assign_locations(reserved_slots,
+                                                        prog->SeparateShader);
    matches.store_locations();
 
    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 331d9a28007..a87bbb2b994 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -631,20 +631,12 @@ link_invalidate_variable_locations(exec_list *ir)
 
       /* ir_variable::is_unmatched_generic_inout is used by the linker while
        * connecting outputs from one stage to inputs of the next stage.
-       *
-       * There are two implicit assumptions here.  First, we assume that any
-       * built-in variable (i.e., non-generic in or out) will have
-       * explicit_location set.  Second, we assume that any generic in or out
-       * will not have explicit_location set.
-       *
-       * This second assumption will only be valid until
-       * GL_ARB_separate_shader_objects is supported.  When that extension is
-       * implemented, this function will need some modifications.
        */
-      if (!var->data.explicit_location) {
-         var->data.is_unmatched_generic_inout = 1;
-      } else {
+      if (var->data.explicit_location &&
+          var->data.location < VARYING_SLOT_VAR0) {
          var->data.is_unmatched_generic_inout = 0;
+      } else {
+         var->data.is_unmatched_generic_inout = 1;
       }
    }
 }
@@ -2421,6 +2413,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	 continue;
 
       if (var->data.explicit_location) {
+         var->data.is_unmatched_generic_inout = 0;
 	 if ((var->data.location >= (int)(max_index + generic_base))
 	     || (var->data.location < 0)) {
 	    linker_error(prog,
@@ -2690,6 +2683,53 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
    return true;
 }
 
+/**
+ * Match explicit locations of outputs to inputs and deactivate the
+ * unmatch flag if found so we don't optimise them away.
+ */
+static void
+match_explicit_outputs_to_inputs(struct gl_shader_program *prog,
+                                 gl_shader *producer,
+                                 gl_shader *consumer)
+{
+   glsl_symbol_table parameters;
+   ir_variable *explicit_locations[MAX_VARYING] = { NULL };
+
+   /* Find all shader outputs in the "producer" stage.
+    */
+   foreach_in_list(ir_instruction, node, producer->ir) {
+      ir_variable *const var = node->as_variable();
+
+      if ((var == NULL) || (var->data.mode != ir_var_shader_out))
+         continue;
+
+      if (var->data.explicit_location &&
+          var->data.location >= VARYING_SLOT_VAR0) {
+         const unsigned idx = var->data.location - VARYING_SLOT_VAR0;
+         if (explicit_locations[idx] == NULL)
+            explicit_locations[idx] = var;
+      }
+   }
+
+   /* Match inputs to outputs */
+   foreach_in_list(ir_instruction, node, consumer->ir) {
+      ir_variable *const input = node->as_variable();
+
+      if ((input == NULL) || (input->data.mode != ir_var_shader_in))
+         continue;
+
+      ir_variable *output = NULL;
+      if (input->data.explicit_location
+          && input->data.location >= VARYING_SLOT_VAR0) {
+         output = explicit_locations[input->data.location - VARYING_SLOT_VAR0];
+
+         if (output != NULL){
+            input->data.is_unmatched_generic_inout = 0;
+            output->data.is_unmatched_generic_inout = 0;
+         }
+      }
+   }
+}
 
 /**
  * Demote shader inputs and outputs that are not used in other stages
@@ -3940,6 +3980,77 @@ split_ubos_and_ssbos(void *mem_ctx,
    assert(*num_ubos + *num_ssbos == num_blocks);
 }
 
+static void
+set_always_active_io(exec_list *ir, ir_variable_mode io_mode)
+{
+   assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
+
+   foreach_in_list(ir_instruction, node, ir) {
+      ir_variable *const var = node->as_variable();
+
+      if (var == NULL || var->data.mode != io_mode)
+         continue;
+
+      /* Don't set always active on builtins that haven't been redeclared */
+      if (var->data.how_declared == ir_var_declared_implicitly)
+         continue;
+
+      var->data.always_active_io = true;
+   }
+}
+
+/**
+ * When separate shader programs are enabled, only input/outputs between
+ * the stages of a multi-stage separate program can be safely removed
+ * from the shader interface. Other inputs/outputs must remain active.
+ */
+static void
+disable_varying_optimizations_for_sso(struct gl_shader_program *prog)
+{
+   unsigned first, last;
+   assert(prog->SeparateShader);
+
+   first = MESA_SHADER_STAGES;
+   last = 0;
+
+   /* Determine first and last stage. Excluding the compute stage */
+   for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+      if (!prog->_LinkedShaders[i])
+         continue;
+      if (first == MESA_SHADER_STAGES)
+         first = i;
+      last = i;
+   }
+
+   if (first == MESA_SHADER_STAGES)
+      return;
+
+   for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
+      gl_shader *sh = prog->_LinkedShaders[stage];
+      if (!sh)
+         continue;
+
+      if (first == last) {
+         /* For a single shader program only allow inputs to the vertex shader
+          * and outputs from the fragment shader to be removed.
+          */
+         if (stage != MESA_SHADER_VERTEX)
+            set_always_active_io(sh->ir, ir_var_shader_in);
+         if (stage != MESA_SHADER_FRAGMENT)
+            set_always_active_io(sh->ir, ir_var_shader_out);
+      } else {
+         /* For multi-stage separate shader programs only allow inputs and
+          * outputs between the shader stages to be removed as well as inputs
+          * to the vertex shader and outputs from the fragment shader.
+          */
+         if (stage == first && stage != MESA_SHADER_VERTEX)
+            set_always_active_io(sh->ir, ir_var_shader_in);
+         else if (stage == last && stage != MESA_SHADER_FRAGMENT)
+            set_always_active_io(sh->ir, ir_var_shader_out);
+      }
+   }
+}
+
 void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 {
@@ -4139,11 +4250,18 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    if (!prog->LinkStatus)
       goto done;
 
-   unsigned prev;
+   unsigned first, last, prev;
 
-   for (prev = 0; prev <= MESA_SHADER_FRAGMENT; prev++) {
-      if (prog->_LinkedShaders[prev] != NULL)
-         break;
+   first = MESA_SHADER_STAGES;
+   last = 0;
+
+   /* Determine first and last stage. */
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (!prog->_LinkedShaders[i])
+         continue;
+      if (first == MESA_SHADER_STAGES)
+         first = i;
+      last = i;
    }
 
    check_explicit_uniform_locations(ctx, prog);
@@ -4157,6 +4275,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    /* Validate the inputs of each stage with the output of the preceding
     * stage.
     */
+   prev = first;
    for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) {
       if (prog->_LinkedShaders[i] == NULL)
          continue;
@@ -4199,6 +4318,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       }
    }
 
+   if (prog->SeparateShader)
+      disable_varying_optimizations_for_sso(prog);
+
    if (!interstage_cross_validate_uniform_blocks(prog))
       goto done;
 
@@ -4250,6 +4372,16 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       }
    }
 
+   prev = first;
+   for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) {
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      match_explicit_outputs_to_inputs(prog, prog->_LinkedShaders[prev],
+                                       prog->_LinkedShaders[i]);
+      prev = i;
+   }
+
    if (!assign_attribute_or_color_locations(prog, &ctx->Const,
                                             MESA_SHADER_VERTEX)) {
       goto done;
@@ -4260,20 +4392,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       goto done;
    }
 
-   unsigned first, last;
-
-   first = MESA_SHADER_STAGES;
-   last = 0;
-
-   /* Determine first and last stage. */
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (!prog->_LinkedShaders[i])
-         continue;
-      if (first == MESA_SHADER_STAGES)
-         first = i;
-      last = i;
-   }
-
    if (num_tfeedback_decls != 0) {
       /* From GL_EXT_transform_feedback:
        *   A program will fail to link if:
@@ -4333,13 +4451,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       do_dead_builtin_varyings(ctx, sh, NULL,
                                num_tfeedback_decls, tfeedback_decls);
 
-      if (!prog->SeparateShader)
+      if (!prog->SeparateShader) {
          demote_shader_inputs_and_outputs(sh, ir_var_shader_out);
-
-      /* Eliminate code that is now dead due to unused outputs being demoted.
-       */
-      while (do_dead_code(sh->ir, false))
-         ;
+         /* Eliminate code that is now dead due to unused outputs being
+          * demoted.
+          */
+         while (do_dead_code(sh->ir, false))
+            ;
+      }
    }
    else if (first == MESA_SHADER_FRAGMENT) {
       /* If the program only contains a fragment shader...
@@ -4356,11 +4475,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
                                        0 /* num_tfeedback_decls */,
                                        NULL /* tfeedback_decls */))
             goto done;
-      } else
+      } else {
          demote_shader_inputs_and_outputs(sh, ir_var_shader_in);
-
-      while (do_dead_code(sh->ir, false))
-         ;
+         /* Eliminate code that is now dead due to unused inputs being
+          * demoted.
+          */
+         while (do_dead_code(sh->ir, false))
+            ;
+      }
    }
 
    next = last;
@@ -4485,6 +4607,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
          lower_ubo_reference(prog->_LinkedShaders[i]);
 
+      if (ctx->Const.ShaderCompilerOptions[i].LowerShaderSharedVariables)
+         lower_shared_reference(prog->_LinkedShaders[i],
+                                &prog->Comp.SharedSize);
+
       lower_vector_derefs(prog->_LinkedShaders[i]);
    }
 
diff --git a/src/glsl/list.h b/src/glsl/list.h
index 15fcd4abd1c..a1c4d82b017 100644
--- a/src/glsl/list.h
+++ b/src/glsl/list.h
@@ -688,7 +688,7 @@ inline void exec_node::insert_before(exec_list *before)
         __node = __next, __next =                                          \
            exec_node_data(__type, (__next)->__field.next, __field))
 
-#define foreach_list_typed_safe_reverse(__type, __node, __field, __list)   \
+#define foreach_list_typed_reverse_safe(__type, __node, __field, __list)   \
    for (__type * __node =                                                  \
            exec_node_data(__type, (__list)->tail_pred, __field),           \
                * __prev =                                                  \
diff --git a/src/glsl/lower_buffer_access.cpp b/src/glsl/lower_buffer_access.cpp
new file mode 100644
index 00000000000..f8c8d140ea8
--- /dev/null
+++ b/src/glsl/lower_buffer_access.cpp
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_buffer_access.cpp
+ *
+ * Helper for IR lowering pass to replace dereferences of buffer object based
+ * shader variables with intrinsic function calls.
+ *
+ * This helper is used by lowering passes for UBOs, SSBOs and compute shader
+ * shared variables.
+ */
+
+#include "lower_buffer_access.h"
+#include "ir_builder.h"
+#include "main/macros.h"
+#include "util/list.h"
+#include "glsl_parser_extras.h"
+
+using namespace ir_builder;
+
+namespace lower_buffer_access {
+
+static inline int
+writemask_for_size(unsigned n)
+{
+   return ((1 << n) - 1);
+}
+
+/**
+ * Takes a deref and recursively calls itself to break the deref down to the
+ * point that the reads or writes generated are contiguous scalars or vectors.
+ */
+void
+lower_buffer_access::emit_access(void *mem_ctx,
+                                 bool is_write,
+                                 ir_dereference *deref,
+                                 ir_variable *base_offset,
+                                 unsigned int deref_offset,
+                                 bool row_major,
+                                 int matrix_columns,
+                                 unsigned int packing,
+                                 unsigned int write_mask)
+{
+   if (deref->type->is_record()) {
+      unsigned int field_offset = 0;
+
+      for (unsigned i = 0; i < deref->type->length; i++) {
+         const struct glsl_struct_field *field =
+            &deref->type->fields.structure[i];
+         ir_dereference *field_deref =
+            new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
+                                               field->name);
+
+         field_offset =
+            glsl_align(field_offset,
+                       field->type->std140_base_alignment(row_major));
+
+         emit_access(mem_ctx, is_write, field_deref, base_offset,
+                     deref_offset + field_offset,
+                     row_major, 1, packing,
+                     writemask_for_size(field_deref->type->vector_elements));
+
+         field_offset += field->type->std140_size(row_major);
+      }
+      return;
+   }
+
+   if (deref->type->is_array()) {
+      unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
+         deref->type->fields.array->std430_array_stride(row_major) :
+         glsl_align(deref->type->fields.array->std140_size(row_major), 16);
+
+      for (unsigned i = 0; i < deref->type->length; i++) {
+         ir_constant *element = new(mem_ctx) ir_constant(i);
+         ir_dereference *element_deref =
+            new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
+                                              element);
+         emit_access(mem_ctx, is_write, element_deref, base_offset,
+                     deref_offset + i * array_stride,
+                     row_major, 1, packing,
+                     writemask_for_size(element_deref->type->vector_elements));
+      }
+      return;
+   }
+
+   if (deref->type->is_matrix()) {
+      for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
+         ir_constant *col = new(mem_ctx) ir_constant(i);
+         ir_dereference *col_deref =
+            new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col);
+
+         if (row_major) {
+            /* For a row-major matrix, the next column starts at the next
+             * element.
+             */
+            int size_mul = deref->type->is_double() ? 8 : 4;
+            emit_access(mem_ctx, is_write, col_deref, base_offset,
+                        deref_offset + i * size_mul,
+                        row_major, deref->type->matrix_columns, packing,
+                        writemask_for_size(col_deref->type->vector_elements));
+         } else {
+            int size_mul;
+
+            /* std430 doesn't round up vec2 size to a vec4 size */
+            if (packing == GLSL_INTERFACE_PACKING_STD430 &&
+                deref->type->vector_elements == 2 &&
+                !deref->type->is_double()) {
+               size_mul = 8;
+            } else {
+               /* std140 always rounds the stride of arrays (and matrices) to a
+                * vec4, so matrices are always 16 between columns/rows. With
+                * doubles, they will be 32 apart when there are more than 2 rows.
+                *
+                * For both std140 and std430, if the member is a
+                * three-'component vector with components consuming N basic
+                * machine units, the base alignment is 4N. For vec4, base
+                * alignment is 4N.
+                */
+               size_mul = (deref->type->is_double() &&
+                           deref->type->vector_elements > 2) ? 32 : 16;
+            }
+
+            emit_access(mem_ctx, is_write, col_deref, base_offset,
+                        deref_offset + i * size_mul,
+                        row_major, deref->type->matrix_columns, packing,
+                        writemask_for_size(col_deref->type->vector_elements));
+         }
+      }
+      return;
+   }
+
+   assert(deref->type->is_scalar() || deref->type->is_vector());
+
+   if (!row_major) {
+      ir_rvalue *offset =
+         add(base_offset, new(mem_ctx) ir_constant(deref_offset));
+      unsigned mask =
+         is_write ? write_mask : (1 << deref->type->vector_elements) - 1;
+      insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1);
+   } else {
+      unsigned N = deref->type->is_double() ? 8 : 4;
+
+      /* We're dereffing a column out of a row-major matrix, so we
+       * gather the vector from each stored row.
+      */
+      assert(deref->type->base_type == GLSL_TYPE_FLOAT ||
+             deref->type->base_type == GLSL_TYPE_DOUBLE);
+      /* Matrices, row_major or not, are stored as if they were
+       * arrays of vectors of the appropriate size in std140.
+       * Arrays have their strides rounded up to a vec4, so the
+       * matrix stride is always 16. However a double matrix may either be 16
+       * or 32 depending on the number of columns.
+       */
+      assert(matrix_columns <= 4);
+      unsigned matrix_stride = 0;
+      /* Matrix stride for std430 mat2xY matrices are not rounded up to
+       * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform
+       * Block Layout":
+       *
+       * "2. If the member is a two- or four-component vector with components
+       * consuming N basic machine units, the base alignment is 2N or 4N,
+       * respectively." [...]
+       * "4. If the member is an array of scalars or vectors, the base alignment
+       * and array stride are set to match the base alignment of a single array
+       * element, according to rules (1), (2), and (3), and rounded up to the
+       * base alignment of a vec4." [...]
+       * "7. If the member is a row-major matrix with C columns and R rows, the
+       * matrix is stored identically to an array of R row vectors with C
+       * components each, according to rule (4)." [...]
+       * "When using the std430 storage layout, shader storage blocks will be
+       * laid out in buffer storage identically to uniform and shader storage
+       * blocks using the std140 layout, except that the base alignment and
+       * stride of arrays of scalars and vectors in rule 4 and of structures in
+       * rule 9 are not rounded up a multiple of the base alignment of a vec4."
+       */
+      if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2)
+         matrix_stride = 2 * N;
+      else
+         matrix_stride = glsl_align(matrix_columns * N, 16);
+
+      const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
+         glsl_type::float_type : glsl_type::double_type;
+
+      for (unsigned i = 0; i < deref->type->vector_elements; i++) {
+         ir_rvalue *chan_offset =
+            add(base_offset,
+                new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
+         if (!is_write || ((1U << i) & write_mask))
+            insert_buffer_access(mem_ctx, deref, deref_type, chan_offset,
+                                 (1U << i), i);
+      }
+   }
+}
+
+/**
+ * Determine if a thing being dereferenced is row-major
+ *
+ * There is some trickery here.
+ *
+ * If the thing being dereferenced is a member of uniform block \b without an
+ * instance name, then the name of the \c ir_variable is the field name of an
+ * interface type.  If this field is row-major, then the thing referenced is
+ * row-major.
+ *
+ * If the thing being dereferenced is a member of uniform block \b with an
+ * instance name, then the last dereference in the tree will be an
+ * \c ir_dereference_record.  If that record field is row-major, then the
+ * thing referenced is row-major.
+ */
+bool
+lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref)
+{
+   bool matrix = false;
+   const ir_rvalue *ir = deref;
+
+   while (true) {
+      matrix = matrix || ir->type->without_array()->is_matrix();
+
+      switch (ir->ir_type) {
+      case ir_type_dereference_array: {
+         const ir_dereference_array *const array_deref =
+            (const ir_dereference_array *) ir;
+
+         ir = array_deref->array;
+         break;
+      }
+
+      case ir_type_dereference_record: {
+         const ir_dereference_record *const record_deref =
+            (const ir_dereference_record *) ir;
+
+         ir = record_deref->record;
+
+         const int idx = ir->type->field_index(record_deref->field);
+         assert(idx >= 0);
+
+         const enum glsl_matrix_layout matrix_layout =
+            glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout);
+
+         switch (matrix_layout) {
+         case GLSL_MATRIX_LAYOUT_INHERITED:
+            break;
+         case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
+            return false;
+         case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
+            return matrix || deref->type->without_array()->is_record();
+         }
+
+         break;
+      }
+
+      case ir_type_dereference_variable: {
+         const ir_dereference_variable *const var_deref =
+            (const ir_dereference_variable *) ir;
+
+         const enum glsl_matrix_layout matrix_layout =
+            glsl_matrix_layout(var_deref->var->data.matrix_layout);
+
+         switch (matrix_layout) {
+         case GLSL_MATRIX_LAYOUT_INHERITED: {
+            /* For interface block matrix variables we handle inherited
+             * layouts at HIR generation time, but we don't do that for shared
+             * variables, which are always column-major
+             */
+            ir_variable *var = deref->variable_referenced();
+            assert((var->is_in_buffer_block() && !matrix) ||
+                   var->data.mode == ir_var_shader_shared);
+            return false;
+         }
+         case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
+            return false;
+         case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
+            return matrix || deref->type->without_array()->is_record();
+         }
+
+         unreachable("invalid matrix layout");
+         break;
+      }
+
+      default:
+         return false;
+      }
+   }
+
+   /* The tree must have ended with a dereference that wasn't an
+    * ir_dereference_variable.  That is invalid, and it should be impossible.
+    */
+   unreachable("invalid dereference tree");
+   return false;
+}
+
+/**
+ * This function initializes various values that will be used later by
+ * emit_access when actually emitting loads or stores.
+ *
+ * Note: const_offset is an input as well as an output, clients must
+ * initialize it to the offset of the variable in the underlying block, and
+ * this function will adjust it by adding the constant offset of the member
+ * being accessed into that variable.
+ */
+void
+lower_buffer_access::setup_buffer_access(void *mem_ctx,
+                                         ir_variable *var,
+                                         ir_rvalue *deref,
+                                         ir_rvalue **offset,
+                                         unsigned *const_offset,
+                                         bool *row_major,
+                                         int *matrix_columns,
+                                         unsigned packing)
+{
+   *offset = new(mem_ctx) ir_constant(0u);
+   *row_major = is_dereferenced_thing_row_major(deref);
+   *matrix_columns = 1;
+
+   /* Calculate the offset to the start of the region of the UBO
+    * dereferenced by *rvalue.  This may be a variable offset if an
+    * array dereference has a variable index.
+    */
+   while (deref) {
+      switch (deref->ir_type) {
+      case ir_type_dereference_variable: {
+         deref = NULL;
+         break;
+      }
+
+      case ir_type_dereference_array: {
+         ir_dereference_array *deref_array = (ir_dereference_array *) deref;
+         unsigned array_stride;
+         if (deref_array->array->type->is_vector()) {
+            /* We get this when storing or loading a component out of a vector
+             * with a non-constant index. This happens for v[i] = f where v is
+             * a vector (or m[i][j] = f where m is a matrix). If we don't
+             * lower that here, it gets turned into v = vector_insert(v, i,
+             * f), which loads the entire vector, modifies one component and
+             * then write the entire thing back.  That breaks if another
+             * thread or SIMD channel is modifying the same vector.
+             */
+            array_stride = 4;
+            if (deref_array->array->type->is_double())
+               array_stride *= 2;
+         } else if (deref_array->array->type->is_matrix() && *row_major) {
+            /* When loading a vector out of a row major matrix, the
+             * step between the columns (vectors) is the size of a
+             * float, while the step between the rows (elements of a
+             * vector) is handled below in emit_ubo_loads.
+             */
+            array_stride = 4;
+            if (deref_array->array->type->is_double())
+               array_stride *= 2;
+            *matrix_columns = deref_array->array->type->matrix_columns;
+         } else if (deref_array->type->without_array()->is_interface()) {
+            /* We're processing an array dereference of an interface instance
+             * array. The thing being dereferenced *must* be a variable
+             * dereference because interfaces cannot be embedded in other
+             * types. In terms of calculating the offsets for the lowering
+             * pass, we don't care about the array index. All elements of an
+             * interface instance array will have the same offsets relative to
+             * the base of the block that backs them.
+             */
+            deref = deref_array->array->as_dereference();
+            break;
+         } else {
+            /* Whether or not the field is row-major (because it might be a
+             * bvec2 or something) does not affect the array itself. We need
+             * to know whether an array element in its entirety is row-major.
+             */
+            const bool array_row_major =
+               is_dereferenced_thing_row_major(deref_array);
+
+            /* The array type will give the correct interface packing
+             * information
+             */
+            if (packing == GLSL_INTERFACE_PACKING_STD430) {
+               array_stride = deref_array->type->std430_array_stride(array_row_major);
+            } else {
+               array_stride = deref_array->type->std140_size(array_row_major);
+               array_stride = glsl_align(array_stride, 16);
+            }
+         }
+
+         ir_rvalue *array_index = deref_array->array_index;
+         if (array_index->type->base_type == GLSL_TYPE_INT)
+            array_index = i2u(array_index);
+
+         ir_constant *const_index =
+            array_index->constant_expression_value(NULL);
+         if (const_index) {
+            *const_offset += array_stride * const_index->value.u[0];
+         } else {
+            *offset = add(*offset,
+                          mul(array_index,
+                              new(mem_ctx) ir_constant(array_stride)));
+         }
+         deref = deref_array->array->as_dereference();
+         break;
+      }
+
+      case ir_type_dereference_record: {
+         ir_dereference_record *deref_record = (ir_dereference_record *) deref;
+         const glsl_type *struct_type = deref_record->record->type;
+         unsigned intra_struct_offset = 0;
+
+         for (unsigned int i = 0; i < struct_type->length; i++) {
+            const glsl_type *type = struct_type->fields.structure[i].type;
+
+            ir_dereference_record *field_deref = new(mem_ctx)
+               ir_dereference_record(deref_record->record,
+                                     struct_type->fields.structure[i].name);
+            const bool field_row_major =
+               is_dereferenced_thing_row_major(field_deref);
+
+            ralloc_free(field_deref);
+
+            unsigned field_align = 0;
+
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               field_align = type->std430_base_alignment(field_row_major);
+            else
+               field_align = type->std140_base_alignment(field_row_major);
+
+            intra_struct_offset = glsl_align(intra_struct_offset, field_align);
+
+            if (strcmp(struct_type->fields.structure[i].name,
+                       deref_record->field) == 0)
+               break;
+
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               intra_struct_offset += type->std430_size(field_row_major);
+            else
+               intra_struct_offset += type->std140_size(field_row_major);
+
+            /* If the field just examined was itself a structure, apply rule
+             * #9:
+             *
+             *     "The structure may have padding at the end; the base offset
+             *     of the member following the sub-structure is rounded up to
+             *     the next multiple of the base alignment of the structure."
+             */
+            if (type->without_array()->is_record()) {
+               intra_struct_offset = glsl_align(intra_struct_offset,
+                                                field_align);
+
+            }
+         }
+
+         *const_offset += intra_struct_offset;
+         deref = deref_record->record->as_dereference();
+         break;
+      }
+
+      case ir_type_swizzle: {
+         ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
+
+         assert(deref_swizzle->mask.num_components == 1);
+
+         *const_offset += deref_swizzle->mask.x * sizeof(int);
+         deref = deref_swizzle->val->as_dereference();
+         break;
+      }
+
+      default:
+         assert(!"not reached");
+         deref = NULL;
+         break;
+      }
+   }
+}
+
+} /* namespace lower_buffer_access */
diff --git a/src/glsl/lower_buffer_access.h b/src/glsl/lower_buffer_access.h
new file mode 100644
index 00000000000..cc4614e9792
--- /dev/null
+++ b/src/glsl/lower_buffer_access.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_buffer_access.h
+ *
+ * Helper for IR lowering pass to replace dereferences of buffer object based
+ * shader variables with intrinsic function calls.
+ *
+ * This helper is used by lowering passes for UBOs, SSBOs and compute shader
+ * shared variables.
+ */
+
+#pragma once
+#ifndef LOWER_BUFFER_ACCESS_H
+#define LOWER_BUFFER_ACCESS_H
+
+#include "ir.h"
+#include "ir_rvalue_visitor.h"
+
+namespace lower_buffer_access {
+
+class lower_buffer_access : public ir_rvalue_enter_visitor {
+public:
+   virtual void
+   insert_buffer_access(void *mem_ctx, ir_dereference *deref,
+                        const glsl_type *type, ir_rvalue *offset,
+                        unsigned mask, int channel) = 0;
+
+   void emit_access(void *mem_ctx, bool is_write, ir_dereference *deref,
+                    ir_variable *base_offset, unsigned int deref_offset,
+                    bool row_major, int matrix_columns,
+                    unsigned int packing, unsigned int write_mask);
+
+   bool is_dereferenced_thing_row_major(const ir_rvalue *deref);
+
+   void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref,
+                            ir_rvalue **offset, unsigned *const_offset,
+                            bool *row_major, int *matrix_columns,
+                            unsigned packing);
+};
+
+} /* namespace lower_buffer_access */
+
+#endif /* LOWER_BUFFER_ACCESS_H */
diff --git a/src/glsl/lower_named_interface_blocks.cpp b/src/glsl/lower_named_interface_blocks.cpp
index 114bb5811b4..f29eba4f75f 100644
--- a/src/glsl/lower_named_interface_blocks.cpp
+++ b/src/glsl/lower_named_interface_blocks.cpp
@@ -187,6 +187,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
             new_var->data.sample = iface_t->fields.structure[i].sample;
             new_var->data.patch = iface_t->fields.structure[i].patch;
             new_var->data.stream = var->data.stream;
+            new_var->data.how_declared = var->data.how_declared;
 
             new_var->init_interface_type(iface_t);
             hash_table_insert(interface_namespace, new_var,
diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp
index 037c27d88ab..8d1eb1725d5 100644
--- a/src/glsl/lower_packed_varyings.cpp
+++ b/src/glsl/lower_packed_varyings.cpp
@@ -622,6 +622,7 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
       packed_var->data.interpolation = unpacked_var->data.interpolation;
       packed_var->data.location = location;
       packed_var->data.precision = unpacked_var->data.precision;
+      packed_var->data.always_active_io = unpacked_var->data.always_active_io;
       unpacked_var->insert_before(packed_var);
       this->packed_varyings[slot] = packed_var;
    } else {
diff --git a/src/glsl/lower_shared_reference.cpp b/src/glsl/lower_shared_reference.cpp
new file mode 100644
index 00000000000..533cd9202f4
--- /dev/null
+++ b/src/glsl/lower_shared_reference.cpp
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_shared_reference.cpp
+ *
+ * IR lower pass to replace dereferences of compute shader shared variables
+ * with intrinsic function calls.
+ *
+ * This relieves drivers of the responsibility of allocating space for the
+ * shared variables in the shared memory region.
+ */
+
+#include "lower_buffer_access.h"
+#include "ir_builder.h"
+#include "main/macros.h"
+#include "util/list.h"
+#include "glsl_parser_extras.h"
+
+using namespace ir_builder;
+
+namespace {
+
+struct var_offset {
+   struct list_head node;
+   const ir_variable *var;
+   unsigned offset;
+};
+
+class lower_shared_reference_visitor :
+      public lower_buffer_access::lower_buffer_access {
+public:
+
+   lower_shared_reference_visitor(struct gl_shader *shader)
+      : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u)
+   {
+      list_inithead(&var_offsets);
+   }
+
+   ~lower_shared_reference_visitor()
+   {
+      ralloc_free(list_ctx);
+   }
+
+   enum {
+      shared_load_access,
+      shared_store_access,
+      shared_atomic_access,
+   } buffer_access_type;
+
+   void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
+                             const glsl_type *type, ir_rvalue *offset,
+                             unsigned mask, int channel);
+
+   void handle_rvalue(ir_rvalue **rvalue);
+   ir_visitor_status visit_enter(ir_assignment *ir);
+   void handle_assignment(ir_assignment *ir);
+
+   ir_call *lower_shared_atomic_intrinsic(ir_call *ir);
+   ir_call *check_for_shared_atomic_intrinsic(ir_call *ir);
+   ir_visitor_status visit_enter(ir_call *ir);
+
+   unsigned get_shared_offset(const ir_variable *);
+
+   ir_call *shared_load(void *mem_ctx, const struct glsl_type *type,
+                        ir_rvalue *offset);
+   ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
+                         unsigned write_mask);
+
+   void *list_ctx;
+   struct gl_shader *shader;
+   struct list_head var_offsets;
+   unsigned shared_size;
+   bool progress;
+};
+
+unsigned
+lower_shared_reference_visitor::get_shared_offset(const ir_variable *var)
+{
+   list_for_each_entry(var_offset, var_entry, &var_offsets, node) {
+      if (var_entry->var == var)
+         return var_entry->offset;
+   }
+
+   struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset);
+   list_add(&new_entry->node, &var_offsets);
+   new_entry->var = var;
+
+   unsigned var_align = var->type->std430_base_alignment(false);
+   new_entry->offset = glsl_align(shared_size, var_align);
+
+   unsigned var_size = var->type->std430_size(false);
+   shared_size = new_entry->offset + var_size;
+
+   return new_entry->offset;
+}
+
+void
+lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   ir_dereference *deref = (*rvalue)->as_dereference();
+   if (!deref)
+      return;
+
+   ir_variable *var = deref->variable_referenced();
+   if (!var || var->data.mode != ir_var_shader_shared)
+      return;
+
+   buffer_access_type = shared_load_access;
+
+   void *mem_ctx = ralloc_parent(shader->ir);
+
+   ir_rvalue *offset = NULL;
+   unsigned const_offset = get_shared_offset(var);
+   bool row_major;
+   int matrix_columns;
+   assert(var->get_interface_type() == NULL);
+   const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
+
+   setup_buffer_access(mem_ctx, var, deref,
+                       &offset, &const_offset,
+                       &row_major, &matrix_columns, packing);
+
+   /* Now that we've calculated the offset to the start of the
+    * dereference, walk over the type and emit loads into a temporary.
+    */
+   const glsl_type *type = (*rvalue)->type;
+   ir_variable *load_var = new(mem_ctx) ir_variable(type,
+                                                    "shared_load_temp",
+                                                    ir_var_temporary);
+   base_ir->insert_before(load_var);
+
+   ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
+                                                       "shared_load_temp_offset",
+                                                       ir_var_temporary);
+   base_ir->insert_before(load_offset);
+   base_ir->insert_before(assign(load_offset, offset));
+
+   deref = new(mem_ctx) ir_dereference_variable(load_var);
+
+   emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major,
+               matrix_columns, packing, 0);
+
+   *rvalue = deref;
+
+   progress = true;
+}
+
+void
+lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
+{
+   if (!ir || !ir->lhs)
+      return;
+
+   ir_rvalue *rvalue = ir->lhs->as_rvalue();
+   if (!rvalue)
+      return;
+
+   ir_dereference *deref = ir->lhs->as_dereference();
+   if (!deref)
+      return;
+
+   ir_variable *var = ir->lhs->variable_referenced();
+   if (!var || var->data.mode != ir_var_shader_shared)
+      return;
+
+   buffer_access_type = shared_store_access;
+
+   /* We have a write to a shared variable, so declare a temporary and rewrite
+    * the assignment so that the temporary is the LHS.
+    */
+   void *mem_ctx = ralloc_parent(shader->ir);
+
+   const glsl_type *type = rvalue->type;
+   ir_variable *store_var = new(mem_ctx) ir_variable(type,
+                                                     "shared_store_temp",
+                                                     ir_var_temporary);
+   base_ir->insert_before(store_var);
+   ir->lhs = new(mem_ctx) ir_dereference_variable(store_var);
+
+   ir_rvalue *offset = NULL;
+   unsigned const_offset = get_shared_offset(var);
+   bool row_major;
+   int matrix_columns;
+   assert(var->get_interface_type() == NULL);
+   const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
+
+   setup_buffer_access(mem_ctx, var, deref,
+                       &offset, &const_offset,
+                       &row_major, &matrix_columns, packing);
+
+   deref = new(mem_ctx) ir_dereference_variable(store_var);
+
+   ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
+                                                        "shared_store_temp_offset",
+                                                        ir_var_temporary);
+   base_ir->insert_before(store_offset);
+   base_ir->insert_before(assign(store_offset, offset));
+
+   /* Now we have to write the value assigned to the temporary back to memory */
+   emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major,
+               matrix_columns, packing, ir->write_mask);
+
+   progress = true;
+}
+
+ir_visitor_status
+lower_shared_reference_visitor::visit_enter(ir_assignment *ir)
+{
+   handle_assignment(ir);
+   return rvalue_visit(ir);
+}
+
+void
+lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx,
+                                                     ir_dereference *deref,
+                                                     const glsl_type *type,
+                                                     ir_rvalue *offset,
+                                                     unsigned mask,
+                                                     int channel)
+{
+   if (buffer_access_type == shared_store_access) {
+      ir_call *store = shared_store(mem_ctx, deref, offset, mask);
+      base_ir->insert_after(store);
+   } else {
+      ir_call *load = shared_load(mem_ctx, type, offset);
+      base_ir->insert_before(load);
+      ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL);
+      base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+                                    value));
+   }
+}
+
+static bool
+compute_shader_enabled(const _mesa_glsl_parse_state *state)
+{
+   return state->stage == MESA_SHADER_COMPUTE;
+}
+
+ir_call *
+lower_shared_reference_visitor::shared_store(void *mem_ctx,
+                                             ir_rvalue *deref,
+                                             ir_rvalue *offset,
+                                             unsigned write_mask)
+{
+   exec_list sig_params;
+
+   ir_variable *offset_ref = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+   sig_params.push_tail(offset_ref);
+
+   ir_variable *val_ref = new(mem_ctx)
+      ir_variable(deref->type, "value" , ir_var_function_in);
+   sig_params.push_tail(val_ref);
+
+   ir_variable *writemask_ref = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
+   sig_params.push_tail(writemask_ref);
+
+   ir_function_signature *sig = new(mem_ctx)
+      ir_function_signature(glsl_type::void_type, compute_shader_enabled);
+   assert(sig);
+   sig->replace_parameters(&sig_params);
+   sig->is_intrinsic = true;
+
+   ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared");
+   f->add_signature(sig);
+
+   exec_list call_params;
+   call_params.push_tail(offset->clone(mem_ctx, NULL));
+   call_params.push_tail(deref->clone(mem_ctx, NULL));
+   call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
+   return new(mem_ctx) ir_call(sig, NULL, &call_params);
+}
+
+ir_call *
+lower_shared_reference_visitor::shared_load(void *mem_ctx,
+                                            const struct glsl_type *type,
+                                            ir_rvalue *offset)
+{
+   exec_list sig_params;
+
+   ir_variable *offset_ref = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
+   sig_params.push_tail(offset_ref);
+
+   ir_function_signature *sig =
+      new(mem_ctx) ir_function_signature(type, compute_shader_enabled);
+   assert(sig);
+   sig->replace_parameters(&sig_params);
+   sig->is_intrinsic = true;
+
+   ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared");
+   f->add_signature(sig);
+
+   ir_variable *result = new(mem_ctx)
+      ir_variable(type, "shared_load_result", ir_var_temporary);
+   base_ir->insert_before(result);
+   ir_dereference_variable *deref_result = new(mem_ctx)
+      ir_dereference_variable(result);
+
+   exec_list call_params;
+   call_params.push_tail(offset->clone(mem_ctx, NULL));
+
+   return new(mem_ctx) ir_call(sig, deref_result, &call_params);
+}
+
+/* Lowers the intrinsic call to a new internal intrinsic that swaps the access
+ * to the shared variable in the first parameter by an offset. This involves
+ * creating the new internal intrinsic (i.e. the new function signature).
+ */
+ir_call *
+lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
+{
+   /* Shared atomics usually have 2 parameters, the shared variable and an
+    * integer argument. The exception is CompSwap, that has an additional
+    * integer parameter.
+    */
+   int param_count = ir->actual_parameters.length();
+   assert(param_count == 2 || param_count == 3);
+
+   /* First argument must be a scalar integer shared variable */
+   exec_node *param = ir->actual_parameters.get_head();
+   ir_instruction *inst = (ir_instruction *) param;
+   assert(inst->ir_type == ir_type_dereference_variable ||
+          inst->ir_type == ir_type_dereference_array ||
+          inst->ir_type == ir_type_dereference_record ||
+          inst->ir_type == ir_type_swizzle);
+
+   ir_rvalue *deref = (ir_rvalue *) inst;
+   assert(deref->type->is_scalar() && deref->type->is_integer());
+
+   ir_variable *var = deref->variable_referenced();
+   assert(var);
+
+   /* Compute the offset to the start if the dereference
+    */
+   void *mem_ctx = ralloc_parent(shader->ir);
+
+   ir_rvalue *offset = NULL;
+   unsigned const_offset = get_shared_offset(var);
+   bool row_major;
+   int matrix_columns;
+   assert(var->get_interface_type() == NULL);
+   const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
+   buffer_access_type = shared_atomic_access;
+
+   setup_buffer_access(mem_ctx, var, deref,
+                       &offset, &const_offset,
+                       &row_major, &matrix_columns, packing);
+
+   assert(offset);
+   assert(!row_major);
+   assert(matrix_columns == 1);
+
+   ir_rvalue *deref_offset =
+      add(offset, new(mem_ctx) ir_constant(const_offset));
+
+   /* Create the new internal function signature that will take an offset
+    * instead of a shared variable
+    */
+   exec_list sig_params;
+   ir_variable *sig_param = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
+      glsl_type::int_type : glsl_type::uint_type;
+   sig_param = new(mem_ctx)
+         ir_variable(type, "data1", ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   if (param_count == 3) {
+      sig_param = new(mem_ctx)
+            ir_variable(type, "data2", ir_var_function_in);
+      sig_params.push_tail(sig_param);
+   }
+
+   ir_function_signature *sig =
+      new(mem_ctx) ir_function_signature(deref->type,
+                                         compute_shader_enabled);
+   assert(sig);
+   sig->replace_parameters(&sig_params);
+   sig->is_intrinsic = true;
+
+   char func_name[64];
+   sprintf(func_name, "%s_shared", ir->callee_name());
+   ir_function *f = new(mem_ctx) ir_function(func_name);
+   f->add_signature(sig);
+
+   /* Now, create the call to the internal intrinsic */
+   exec_list call_params;
+   call_params.push_tail(deref_offset);
+   param = ir->actual_parameters.get_head()->get_next();
+   ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+   call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+   if (param_count == 3) {
+      param = param->get_next();
+      param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+      call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+   }
+   ir_dereference_variable *return_deref =
+      ir->return_deref->clone(mem_ctx, NULL);
+   return new(mem_ctx) ir_call(sig, return_deref, &call_params);
+}
+
+ir_call *
+lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir)
+{
+   exec_list& params = ir->actual_parameters;
+
+   if (params.length() < 2 || params.length() > 3)
+      return ir;
+
+   ir_rvalue *rvalue =
+      ((ir_instruction *) params.get_head())->as_rvalue();
+   if (!rvalue)
+      return ir;
+
+   ir_variable *var = rvalue->variable_referenced();
+   if (!var || var->data.mode != ir_var_shader_shared)
+      return ir;
+
+   const char *callee = ir->callee_name();
+   if (!strcmp("__intrinsic_atomic_add", callee) ||
+       !strcmp("__intrinsic_atomic_min", callee) ||
+       !strcmp("__intrinsic_atomic_max", callee) ||
+       !strcmp("__intrinsic_atomic_and", callee) ||
+       !strcmp("__intrinsic_atomic_or", callee) ||
+       !strcmp("__intrinsic_atomic_xor", callee) ||
+       !strcmp("__intrinsic_atomic_exchange", callee) ||
+       !strcmp("__intrinsic_atomic_comp_swap", callee)) {
+      return lower_shared_atomic_intrinsic(ir);
+   }
+
+   return ir;
+}
+
+ir_visitor_status
+lower_shared_reference_visitor::visit_enter(ir_call *ir)
+{
+   ir_call *new_ir = check_for_shared_atomic_intrinsic(ir);
+   if (new_ir != ir) {
+      progress = true;
+      base_ir->replace_with(new_ir);
+      return visit_continue_with_parent;
+   }
+
+   return rvalue_visit(ir);
+}
+
+} /* unnamed namespace */
+
+void
+lower_shared_reference(struct gl_shader *shader, unsigned *shared_size)
+{
+   if (shader->Stage != MESA_SHADER_COMPUTE)
+      return;
+
+   lower_shared_reference_visitor v(shader);
+
+   /* Loop over the instructions lowering references, because we take a deref
+    * of an shared variable array using a shared variable dereference as the
+    * index will produce a collection of instructions all of which have cloned
+    * shared variable dereferences for that array index.
+    */
+   do {
+      v.progress = false;
+      visit_list_elements(&v, shader->ir);
+   } while (v.progress);
+
+   *shared_size = v.shared_size;
+}
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index b74aa3d0630..a172054bac8 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -33,106 +33,16 @@
  * their own.
  */
 
-#include "ir.h"
+#include "lower_buffer_access.h"
 #include "ir_builder.h"
-#include "ir_rvalue_visitor.h"
 #include "main/macros.h"
 #include "glsl_parser_extras.h"
 
 using namespace ir_builder;
 
-/**
- * Determine if a thing being dereferenced is row-major
- *
- * There is some trickery here.
- *
- * If the thing being dereferenced is a member of uniform block \b without an
- * instance name, then the name of the \c ir_variable is the field name of an
- * interface type.  If this field is row-major, then the thing referenced is
- * row-major.
- *
- * If the thing being dereferenced is a member of uniform block \b with an
- * instance name, then the last dereference in the tree will be an
- * \c ir_dereference_record.  If that record field is row-major, then the
- * thing referenced is row-major.
- */
-static bool
-is_dereferenced_thing_row_major(const ir_rvalue *deref)
-{
-   bool matrix = false;
-   const ir_rvalue *ir = deref;
-
-   while (true) {
-      matrix = matrix || ir->type->without_array()->is_matrix();
-
-      switch (ir->ir_type) {
-      case ir_type_dereference_array: {
-         const ir_dereference_array *const array_deref =
-            (const ir_dereference_array *) ir;
-
-         ir = array_deref->array;
-         break;
-      }
-
-      case ir_type_dereference_record: {
-         const ir_dereference_record *const record_deref =
-            (const ir_dereference_record *) ir;
-
-         ir = record_deref->record;
-
-         const int idx = ir->type->field_index(record_deref->field);
-         assert(idx >= 0);
-
-         const enum glsl_matrix_layout matrix_layout =
-            glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout);
-
-         switch (matrix_layout) {
-         case GLSL_MATRIX_LAYOUT_INHERITED:
-            break;
-         case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
-            return false;
-         case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
-            return matrix || deref->type->without_array()->is_record();
-         }
-
-         break;
-      }
-
-      case ir_type_dereference_variable: {
-         const ir_dereference_variable *const var_deref =
-            (const ir_dereference_variable *) ir;
-
-         const enum glsl_matrix_layout matrix_layout =
-            glsl_matrix_layout(var_deref->var->data.matrix_layout);
-
-         switch (matrix_layout) {
-         case GLSL_MATRIX_LAYOUT_INHERITED:
-            assert(!matrix);
-            return false;
-         case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
-            return false;
-         case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
-            return matrix || deref->type->without_array()->is_record();
-         }
-
-         unreachable("invalid matrix layout");
-         break;
-      }
-
-      default:
-         return false;
-      }
-   }
-
-   /* The tree must have ended with a dereference that wasn't an
-    * ir_dereference_variable.  That is invalid, and it should be impossible.
-    */
-   unreachable("invalid dereference tree");
-   return false;
-}
-
 namespace {
-class lower_ubo_reference_visitor : public ir_rvalue_enter_visitor {
+class lower_ubo_reference_visitor :
+      public lower_buffer_access::lower_buffer_access {
 public:
    lower_ubo_reference_visitor(struct gl_shader *shader)
    : shader(shader)
@@ -142,30 +52,38 @@ public:
    void handle_rvalue(ir_rvalue **rvalue);
    ir_visitor_status visit_enter(ir_assignment *ir);
 
-   void setup_for_load_or_store(ir_variable *var,
+   void setup_for_load_or_store(void *mem_ctx,
+                                ir_variable *var,
                                 ir_rvalue *deref,
                                 ir_rvalue **offset,
                                 unsigned *const_offset,
                                 bool *row_major,
                                 int *matrix_columns,
                                 unsigned packing);
-   ir_expression *ubo_load(const struct glsl_type *type,
+   ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
 			   ir_rvalue *offset);
-   ir_call *ssbo_load(const struct glsl_type *type,
+   ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
                       ir_rvalue *offset);
 
+   bool check_for_buffer_array_copy(ir_assignment *ir);
+   bool check_for_buffer_struct_copy(ir_assignment *ir);
    void check_for_ssbo_store(ir_assignment *ir);
-   void write_to_memory(ir_dereference *deref,
-                        ir_variable *var,
-                        ir_variable *write_var,
-                        unsigned write_mask);
-   ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset,
+   void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
+                        ir_variable *write_var, unsigned write_mask);
+   ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
                        unsigned write_mask);
 
-   void emit_access(bool is_write, ir_dereference *deref,
-                    ir_variable *base_offset, unsigned int deref_offset,
-                    bool row_major, int matrix_columns,
-                    unsigned packing, unsigned write_mask);
+   enum {
+      ubo_load_access,
+      ssbo_load_access,
+      ssbo_store_access,
+      ssbo_unsized_array_length_access,
+      ssbo_atomic_access,
+   } buffer_access_type;
+
+   void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
+                             const glsl_type *type, ir_rvalue *offset,
+                             unsigned mask, int channel);
 
    ir_visitor_status visit_enter(class ir_expression *);
    ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
@@ -175,7 +93,7 @@ public:
    ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
                                                     ir_dereference *,
                                                     ir_variable *);
-   ir_expression *emit_ssbo_get_buffer_size();
+   ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
 
    unsigned calculate_unsized_array_stride(ir_dereference *deref,
                                            unsigned packing);
@@ -184,12 +102,10 @@ public:
    ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
    ir_visitor_status visit_enter(ir_call *ir);
 
-   void *mem_ctx;
    struct gl_shader *shader;
    struct gl_uniform_buffer_variable *ubo_var;
    ir_rvalue *uniform_block;
    bool progress;
-   bool is_shader_storage;
 };
 
 /**
@@ -324,7 +240,8 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
 }
 
 void
-lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
+lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
+                                                     ir_variable *var,
                                                      ir_rvalue *deref,
                                                      ir_rvalue **offset,
                                                      unsigned *const_offset,
@@ -339,10 +256,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
                            deref, &nonconst_block_index);
 
    /* Locate the block by interface name */
-   this->is_shader_storage = var->is_in_shader_storage_block();
    unsigned num_blocks;
    struct gl_uniform_block **blocks;
-   if (this->is_shader_storage) {
+   if (this->buffer_access_type != ubo_load_access) {
       num_blocks = shader->NumShaderStorageBlocks;
       blocks = shader->ShaderStorageBlocks;
    } else {
@@ -370,164 +286,10 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
 
    assert(this->uniform_block);
 
-   *offset = new(mem_ctx) ir_constant(0u);
-   *const_offset = 0;
-   *row_major = is_dereferenced_thing_row_major(deref);
-   *matrix_columns = 1;
+   *const_offset = ubo_var->Offset;
 
-   /* Calculate the offset to the start of the region of the UBO
-    * dereferenced by *rvalue.  This may be a variable offset if an
-    * array dereference has a variable index.
-    */
-   while (deref) {
-      switch (deref->ir_type) {
-      case ir_type_dereference_variable: {
-         *const_offset += ubo_var->Offset;
-         deref = NULL;
-         break;
-      }
-
-      case ir_type_dereference_array: {
-         ir_dereference_array *deref_array = (ir_dereference_array *) deref;
-         unsigned array_stride;
-         if (deref_array->array->type->is_vector()) {
-            /* We get this when storing or loading a component out of a vector
-             * with a non-constant index. This happens for v[i] = f where v is
-             * a vector (or m[i][j] = f where m is a matrix). If we don't
-             * lower that here, it gets turned into v = vector_insert(v, i,
-             * f), which loads the entire vector, modifies one component and
-             * then write the entire thing back.  That breaks if another
-             * thread or SIMD channel is modifying the same vector.
-             */
-            array_stride = 4;
-            if (deref_array->array->type->is_double())
-               array_stride *= 2;
-         } else if (deref_array->array->type->is_matrix() && *row_major) {
-            /* When loading a vector out of a row major matrix, the
-             * step between the columns (vectors) is the size of a
-             * float, while the step between the rows (elements of a
-             * vector) is handled below in emit_ubo_loads.
-             */
-            array_stride = 4;
-            if (deref_array->array->type->is_double())
-               array_stride *= 2;
-            *matrix_columns = deref_array->array->type->matrix_columns;
-         } else if (deref_array->type->without_array()->is_interface()) {
-            /* We're processing an array dereference of an interface instance
-             * array. The thing being dereferenced *must* be a variable
-             * dereference because interfaces cannot be embedded in other
-             * types. In terms of calculating the offsets for the lowering
-             * pass, we don't care about the array index. All elements of an
-             * interface instance array will have the same offsets relative to
-             * the base of the block that backs them.
-             */
-            deref = deref_array->array->as_dereference();
-            break;
-         } else {
-            /* Whether or not the field is row-major (because it might be a
-             * bvec2 or something) does not affect the array itself. We need
-             * to know whether an array element in its entirety is row-major.
-             */
-            const bool array_row_major =
-               is_dereferenced_thing_row_major(deref_array);
-
-            /* The array type will give the correct interface packing
-             * information
-             */
-            if (packing == GLSL_INTERFACE_PACKING_STD430) {
-               array_stride = deref_array->type->std430_array_stride(array_row_major);
-            } else {
-               array_stride = deref_array->type->std140_size(array_row_major);
-               array_stride = glsl_align(array_stride, 16);
-            }
-         }
-
-         ir_rvalue *array_index = deref_array->array_index;
-         if (array_index->type->base_type == GLSL_TYPE_INT)
-            array_index = i2u(array_index);
-
-         ir_constant *const_index =
-            array_index->constant_expression_value(NULL);
-         if (const_index) {
-            *const_offset += array_stride * const_index->value.u[0];
-         } else {
-            *offset = add(*offset,
-                          mul(array_index,
-                              new(mem_ctx) ir_constant(array_stride)));
-         }
-         deref = deref_array->array->as_dereference();
-         break;
-      }
-
-      case ir_type_dereference_record: {
-         ir_dereference_record *deref_record = (ir_dereference_record *) deref;
-         const glsl_type *struct_type = deref_record->record->type;
-         unsigned intra_struct_offset = 0;
-
-         for (unsigned int i = 0; i < struct_type->length; i++) {
-            const glsl_type *type = struct_type->fields.structure[i].type;
-
-            ir_dereference_record *field_deref = new(mem_ctx)
-               ir_dereference_record(deref_record->record,
-                                     struct_type->fields.structure[i].name);
-            const bool field_row_major =
-               is_dereferenced_thing_row_major(field_deref);
-
-            ralloc_free(field_deref);
-
-            unsigned field_align = 0;
-
-            if (packing == GLSL_INTERFACE_PACKING_STD430)
-               field_align = type->std430_base_alignment(field_row_major);
-            else
-               field_align = type->std140_base_alignment(field_row_major);
-
-            intra_struct_offset = glsl_align(intra_struct_offset, field_align);
-
-            if (strcmp(struct_type->fields.structure[i].name,
-                       deref_record->field) == 0)
-               break;
-
-            if (packing == GLSL_INTERFACE_PACKING_STD430)
-               intra_struct_offset += type->std430_size(field_row_major);
-            else
-               intra_struct_offset += type->std140_size(field_row_major);
-
-            /* If the field just examined was itself a structure, apply rule
-             * #9:
-             *
-             *     "The structure may have padding at the end; the base offset
-             *     of the member following the sub-structure is rounded up to
-             *     the next multiple of the base alignment of the structure."
-             */
-            if (type->without_array()->is_record()) {
-               intra_struct_offset = glsl_align(intra_struct_offset,
-                                                field_align);
-
-            }
-         }
-
-         *const_offset += intra_struct_offset;
-         deref = deref_record->record->as_dereference();
-         break;
-      }
-
-      case ir_type_swizzle: {
-         ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
-
-         assert(deref_swizzle->mask.num_components == 1);
-
-         *const_offset += deref_swizzle->mask.x * sizeof(int);
-         deref = deref_swizzle->val->as_dereference();
-         break;
-      }
-
-      default:
-         assert(!"not reached");
-         deref = NULL;
-         break;
-      }
-   }
+   setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
+                       matrix_columns, packing);
 }
 
 void
@@ -544,7 +306,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    if (!var || !var->is_in_buffer_block())
       return;
 
-   mem_ctx = ralloc_parent(shader->ir);
+   void *mem_ctx = ralloc_parent(shader->ir);
 
    ir_rvalue *offset = NULL;
    unsigned const_offset;
@@ -552,10 +314,14 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    int matrix_columns;
    unsigned packing = var->get_interface_type()->interface_packing;
 
+   this->buffer_access_type =
+      var->is_in_shader_storage_block() ?
+      ssbo_load_access : ubo_load_access;
+
    /* Compute the offset to the start if the dereference as well as other
     * information we need to configure the write
     */
-   setup_for_load_or_store(var, deref,
+   setup_for_load_or_store(mem_ctx, var, deref,
                            &offset, &const_offset,
                            &row_major, &matrix_columns,
                            packing);
@@ -577,7 +343,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    base_ir->insert_before(assign(load_offset, offset));
 
    deref = new(mem_ctx) ir_dereference_variable(load_var);
-   emit_access(false, deref, load_offset, const_offset,
+   emit_access(mem_ctx, false, deref, load_offset, const_offset,
                row_major, matrix_columns, packing, 0);
    *rvalue = deref;
 
@@ -585,7 +351,8 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
 }
 
 ir_expression *
-lower_ubo_reference_visitor::ubo_load(const glsl_type *type,
+lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
+                                      const glsl_type *type,
 				      ir_rvalue *offset)
 {
    ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
@@ -604,7 +371,8 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
 }
 
 ir_call *
-lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref,
+lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
+                                        ir_rvalue *deref,
                                         ir_rvalue *offset,
                                         unsigned write_mask)
 {
@@ -644,7 +412,8 @@ lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref,
 }
 
 ir_call *
-lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
+lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
+                                       const struct glsl_type *type,
                                        ir_rvalue *offset)
 {
    exec_list sig_params;
@@ -679,208 +448,46 @@ lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
    return new(mem_ctx) ir_call(sig, deref_result, &call_params);
 }
 
-static inline int
-writemask_for_size(unsigned n)
-{
-   return ((1 << n) - 1);
-}
-
-/**
- * Takes a deref and recursively calls itself to break the deref down to the
- * point that the reads or writes generated are contiguous scalars or vectors.
- */
 void
-lower_ubo_reference_visitor::emit_access(bool is_write,
-                                         ir_dereference *deref,
-                                         ir_variable *base_offset,
-                                         unsigned int deref_offset,
-                                         bool row_major,
-                                         int matrix_columns,
-                                         unsigned packing,
-                                         unsigned write_mask)
+lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
+                                                  ir_dereference *deref,
+                                                  const glsl_type *type,
+                                                  ir_rvalue *offset,
+                                                  unsigned mask,
+                                                  int channel)
 {
-   if (deref->type->is_record()) {
-      unsigned int field_offset = 0;
-
-      for (unsigned i = 0; i < deref->type->length; i++) {
-         const struct glsl_struct_field *field =
-            &deref->type->fields.structure[i];
-         ir_dereference *field_deref =
-            new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
-                                               field->name);
-
-         field_offset =
-            glsl_align(field_offset,
-                       field->type->std140_base_alignment(row_major));
-
-         emit_access(is_write, field_deref, base_offset,
-                     deref_offset + field_offset,
-                     row_major, 1, packing,
-                     writemask_for_size(field_deref->type->vector_elements));
-
-         field_offset += field->type->std140_size(row_major);
-      }
-      return;
-   }
-
-   if (deref->type->is_array()) {
-      unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
-         deref->type->fields.array->std430_array_stride(row_major) :
-         glsl_align(deref->type->fields.array->std140_size(row_major), 16);
-
-      for (unsigned i = 0; i < deref->type->length; i++) {
-         ir_constant *element = new(mem_ctx) ir_constant(i);
-         ir_dereference *element_deref =
-            new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
-                                              element);
-         emit_access(is_write, element_deref, base_offset,
-                     deref_offset + i * array_stride,
-                     row_major, 1, packing,
-                     writemask_for_size(element_deref->type->vector_elements));
-      }
-      return;
-   }
-
-   if (deref->type->is_matrix()) {
-      for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
-         ir_constant *col = new(mem_ctx) ir_constant(i);
-         ir_dereference *col_deref =
-            new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col);
-
-         if (row_major) {
-            /* For a row-major matrix, the next column starts at the next
-             * element.
-             */
-            int size_mul = deref->type->is_double() ? 8 : 4;
-            emit_access(is_write, col_deref, base_offset,
-                        deref_offset + i * size_mul,
-                        row_major, deref->type->matrix_columns, packing,
-                        writemask_for_size(col_deref->type->vector_elements));
-         } else {
-            int size_mul;
-
-            /* std430 doesn't round up vec2 size to a vec4 size */
-            if (packing == GLSL_INTERFACE_PACKING_STD430 &&
-                deref->type->vector_elements == 2 &&
-                !deref->type->is_double()) {
-               size_mul = 8;
-            } else {
-               /* std140 always rounds the stride of arrays (and matrices) to a
-                * vec4, so matrices are always 16 between columns/rows. With
-                * doubles, they will be 32 apart when there are more than 2 rows.
-                *
-                * For both std140 and std430, if the member is a
-                * three-'component vector with components consuming N basic
-                * machine units, the base alignment is 4N. For vec4, base
-                * alignment is 4N.
-                */
-               size_mul = (deref->type->is_double() &&
-                           deref->type->vector_elements > 2) ? 32 : 16;
-            }
-
-            emit_access(is_write, col_deref, base_offset,
-                        deref_offset + i * size_mul,
-                        row_major, deref->type->matrix_columns, packing,
-                        writemask_for_size(col_deref->type->vector_elements));
-         }
-      }
-      return;
+   switch (this->buffer_access_type) {
+   case ubo_load_access:
+      base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+                                    ubo_load(mem_ctx, type, offset),
+                                    mask));
+      break;
+   case ssbo_load_access: {
+      ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
+      base_ir->insert_before(load_ssbo);
+      ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
+      ir_assignment *assignment =
+         assign(deref->clone(mem_ctx, NULL), value, mask);
+      base_ir->insert_before(assignment);
+      break;
    }
-
-   assert(deref->type->is_scalar() || deref->type->is_vector());
-
-   if (!row_major) {
-      ir_rvalue *offset =
-         add(base_offset, new(mem_ctx) ir_constant(deref_offset));
-      if (is_write)
-         base_ir->insert_after(ssbo_store(deref, offset, write_mask));
-      else {
-         if (!this->is_shader_storage) {
-             base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-                                           ubo_load(deref->type, offset)));
-         } else {
-            ir_call *load_ssbo = ssbo_load(deref->type, offset);
-            base_ir->insert_before(load_ssbo);
-            ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
-            base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), value));
-         }
-      }
-   } else {
-      unsigned N = deref->type->is_double() ? 8 : 4;
-
-      /* We're dereffing a column out of a row-major matrix, so we
-       * gather the vector from each stored row.
-      */
-      assert(deref->type->base_type == GLSL_TYPE_FLOAT ||
-             deref->type->base_type == GLSL_TYPE_DOUBLE);
-      /* Matrices, row_major or not, are stored as if they were
-       * arrays of vectors of the appropriate size in std140.
-       * Arrays have their strides rounded up to a vec4, so the
-       * matrix stride is always 16. However a double matrix may either be 16
-       * or 32 depending on the number of columns.
-       */
-      assert(matrix_columns <= 4);
-      unsigned matrix_stride = 0;
-      /* Matrix stride for std430 mat2xY matrices are not rounded up to
-       * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform
-       * Block Layout":
-       *
-       * "2. If the member is a two- or four-component vector with components
-       * consuming N basic machine units, the base alignment is 2N or 4N,
-       * respectively." [...]
-       * "4. If the member is an array of scalars or vectors, the base alignment
-       * and array stride are set to match the base alignment of a single array
-       * element, according to rules (1), (2), and (3), and rounded up to the
-       * base alignment of a vec4." [...]
-       * "7. If the member is a row-major matrix with C columns and R rows, the
-       * matrix is stored identically to an array of R row vectors with C
-       * components each, according to rule (4)." [...]
-       * "When using the std430 storage layout, shader storage blocks will be
-       * laid out in buffer storage identically to uniform and shader storage
-       * blocks using the std140 layout, except that the base alignment and
-       * stride of arrays of scalars and vectors in rule 4 and of structures in
-       * rule 9 are not rounded up a multiple of the base alignment of a vec4."
-       */
-      if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2)
-         matrix_stride = 2 * N;
-      else
-         matrix_stride = glsl_align(matrix_columns * N, 16);
-
-      const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
-         glsl_type::float_type : glsl_type::double_type;
-
-      for (unsigned i = 0; i < deref->type->vector_elements; i++) {
-         ir_rvalue *chan_offset =
-            add(base_offset,
-                new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
-         if (is_write) {
-            /* If the component is not in the writemask, then don't
-             * store any value.
-             */
-            if (!((1 << i) & write_mask))
-               continue;
-
-            base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1));
-         } else {
-            if (!this->is_shader_storage) {
-               base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-                                             ubo_load(deref_type, chan_offset),
-                                             (1U << i)));
-            } else {
-               ir_call *load_ssbo = ssbo_load(deref_type, chan_offset);
-               base_ir->insert_before(load_ssbo);
-               ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
-               base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-                                             value,
-                                             (1U << i)));
-            }
-         }
+   case ssbo_store_access:
+      if (channel >= 0) {
+         base_ir->insert_after(ssbo_store(mem_ctx,
+                                          swizzle(deref, channel, 1),
+                                          offset, 1));
+      } else {
+         base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
       }
+      break;
+   default:
+      unreachable("invalid buffer_access_type in insert_buffer_access");
    }
 }
 
 void
-lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
+lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
+                                             ir_dereference *deref,
                                              ir_variable *var,
                                              ir_variable *write_var,
                                              unsigned write_mask)
@@ -891,10 +498,12 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
    int matrix_columns;
    unsigned packing = var->get_interface_type()->interface_packing;
 
+   this->buffer_access_type = ssbo_store_access;
+
    /* Compute the offset to the start if the dereference as well as other
     * information we need to configure the write
     */
-   setup_for_load_or_store(var, deref,
+   setup_for_load_or_store(mem_ctx, var, deref,
                            &offset, &const_offset,
                            &row_major, &matrix_columns,
                            packing);
@@ -910,7 +519,7 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
    base_ir->insert_before(assign(write_offset, offset));
 
    deref = new(mem_ctx) ir_dereference_variable(write_var);
-   emit_access(true, deref, write_offset, const_offset,
+   emit_access(mem_ctx, true, deref, write_offset, const_offset,
                row_major, matrix_columns, packing, write_mask);
 }
 
@@ -985,7 +594,7 @@ lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assig
 }
 
 ir_expression *
-lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
+lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
 {
    ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
    return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
@@ -1059,7 +668,7 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
                                                                ir_dereference *deref,
                                                                ir_variable *var)
 {
-   mem_ctx = ralloc_parent(*rvalue);
+   void *mem_ctx = ralloc_parent(*rvalue);
 
    ir_rvalue *base_offset = NULL;
    unsigned const_offset;
@@ -1068,17 +677,19 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
    unsigned packing = var->get_interface_type()->interface_packing;
    int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
 
+   this->buffer_access_type = ssbo_unsized_array_length_access;
+
    /* Compute the offset to the start if the dereference as well as other
     * information we need to calculate the length.
     */
-   setup_for_load_or_store(var, deref,
+   setup_for_load_or_store(mem_ctx, var, deref,
                            &base_offset, &const_offset,
                            &row_major, &matrix_columns,
                            packing);
    /* array.length() =
     *  max((buffer_object_size - offset_of_array) / stride_of_array, 0)
     */
-   ir_expression *buffer_size = emit_ssbo_get_buffer_size();
+   ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
 
    ir_expression *offset_of_array = new(mem_ctx)
       ir_expression(ir_binop_add, base_offset,
@@ -1112,13 +723,13 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
       return;
 
    ir_variable *var = ir->lhs->variable_referenced();
-   if (!var || !var->is_in_buffer_block())
+   if (!var || !var->is_in_shader_storage_block())
       return;
 
    /* We have a write to a buffer variable, so declare a temporary and rewrite
     * the assignment so that the temporary is the LHS.
     */
-   mem_ctx = ralloc_parent(shader->ir);
+   void *mem_ctx = ralloc_parent(shader->ir);
 
    const glsl_type *type = rvalue->type;
    ir_variable *write_var = new(mem_ctx) ir_variable(type,
@@ -1128,14 +739,131 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
    ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
 
    /* Now we have to write the value assigned to the temporary back to memory */
-   write_to_memory(deref, var, write_var, ir->write_mask);
+   write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
    progress = true;
 }
 
+static bool
+is_buffer_backed_variable(ir_variable *var)
+{
+   return var->is_in_buffer_block() ||
+          var->data.mode == ir_var_shader_shared;
+}
+
+bool
+lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
+{
+   if (!ir || !ir->lhs || !ir->rhs)
+      return false;
+
+   /* LHS and RHS must be arrays
+    * FIXME: arrays of arrays?
+    */
+   if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
+      return false;
+
+   /* RHS must be a buffer-backed variable. This is what can cause the problem
+    * since it would lead to a series of loads that need to live until we
+    * see the writes to the LHS.
+    */
+   ir_variable *rhs_var = ir->rhs->variable_referenced();
+   if (!rhs_var || !is_buffer_backed_variable(rhs_var))
+      return false;
+
+   /* Split the array copy into individual element copies to reduce
+    * register pressure
+    */
+   ir_dereference *rhs_deref = ir->rhs->as_dereference();
+   if (!rhs_deref)
+      return false;
+
+   ir_dereference *lhs_deref = ir->lhs->as_dereference();
+   if (!lhs_deref)
+      return false;
+
+   assert(lhs_deref->type->length == rhs_deref->type->length);
+   void *mem_ctx = ralloc_parent(shader->ir);
+
+   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
+      ir_dereference *lhs_i =
+         new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
+                                           new(mem_ctx) ir_constant(i));
+
+      ir_dereference *rhs_i =
+         new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
+                                           new(mem_ctx) ir_constant(i));
+      ir->insert_after(assign(lhs_i, rhs_i));
+   }
+
+   ir->remove();
+   progress = true;
+   return true;
+}
+
+bool
+lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
+{
+   if (!ir || !ir->lhs || !ir->rhs)
+      return false;
+
+   /* LHS and RHS must be records */
+   if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
+      return false;
+
+   /* RHS must be a buffer-backed variable. This is what can cause the problem
+    * since it would lead to a series of loads that need to live until we
+    * see the writes to the LHS.
+    */
+   ir_variable *rhs_var = ir->rhs->variable_referenced();
+   if (!rhs_var || !is_buffer_backed_variable(rhs_var))
+      return false;
+
+   /* Split the struct copy into individual element copies to reduce
+    * register pressure
+    */
+   ir_dereference *rhs_deref = ir->rhs->as_dereference();
+   if (!rhs_deref)
+      return false;
+
+   ir_dereference *lhs_deref = ir->lhs->as_dereference();
+   if (!lhs_deref)
+      return false;
+
+   assert(lhs_deref->type->record_compare(rhs_deref->type));
+   void *mem_ctx = ralloc_parent(shader->ir);
+
+   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
+      const char *field_name = lhs_deref->type->fields.structure[i].name;
+      ir_dereference *lhs_field =
+         new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
+                                            field_name);
+      ir_dereference *rhs_field =
+         new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
+                                            field_name);
+      ir->insert_after(assign(lhs_field, rhs_field));
+   }
+
+   ir->remove();
+   progress = true;
+   return true;
+}
 
 ir_visitor_status
 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
 {
+   /* Array and struct copies could involve large amounts of load/store
+    * operations. To improve register pressure we want to special-case
+    * these and split them into individual element copies.
+    * This way we avoid emitting all the loads for the RHS first and
+    * all the writes for the LHS second and register usage is more
+    * efficient.
+    */
+   if (check_for_buffer_array_copy(ir))
+      return visit_continue_with_parent;
+
+   if (check_for_buffer_struct_copy(ir))
+      return visit_continue_with_parent;
+
    check_ssbo_unsized_array_length_assignment(ir);
    check_for_ssbo_store(ir);
    return rvalue_visit(ir);
@@ -1173,7 +901,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
    /* Compute the offset to the start if the dereference and the
     * block index
     */
-   mem_ctx = ralloc_parent(shader->ir);
+   void *mem_ctx = ralloc_parent(shader->ir);
 
    ir_rvalue *offset = NULL;
    unsigned const_offset;
@@ -1181,7 +909,9 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
    int matrix_columns;
    unsigned packing = var->get_interface_type()->interface_packing;
 
-   setup_for_load_or_store(var, deref,
+   this->buffer_access_type = ssbo_atomic_access;
+
+   setup_for_load_or_store(mem_ctx, var, deref,
                            &offset, &const_offset,
                            &row_major, &matrix_columns,
                            packing);
@@ -1225,7 +955,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
    sig->is_intrinsic = true;
 
    char func_name[64];
-   sprintf(func_name, "%s_internal", ir->callee_name());
+   sprintf(func_name, "%s_ssbo", ir->callee_name());
    ir_function *f = new(mem_ctx) ir_function(func_name);
    f->add_signature(sig);
 
@@ -1249,15 +979,29 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
 ir_call *
 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
 {
+   exec_list& params = ir->actual_parameters;
+
+   if (params.length() < 2 || params.length() > 3)
+      return ir;
+
+   ir_rvalue *rvalue =
+      ((ir_instruction *) params.get_head())->as_rvalue();
+   if (!rvalue)
+      return ir;
+
+   ir_variable *var = rvalue->variable_referenced();
+   if (!var || !var->is_in_shader_storage_block())
+      return ir;
+
    const char *callee = ir->callee_name();
-   if (!strcmp("__intrinsic_ssbo_atomic_add", callee) ||
-       !strcmp("__intrinsic_ssbo_atomic_min", callee) ||
-       !strcmp("__intrinsic_ssbo_atomic_max", callee) ||
-       !strcmp("__intrinsic_ssbo_atomic_and", callee) ||
-       !strcmp("__intrinsic_ssbo_atomic_or", callee) ||
-       !strcmp("__intrinsic_ssbo_atomic_xor", callee) ||
-       !strcmp("__intrinsic_ssbo_atomic_exchange", callee) ||
-       !strcmp("__intrinsic_ssbo_atomic_comp_swap", callee)) {
+   if (!strcmp("__intrinsic_atomic_add", callee) ||
+       !strcmp("__intrinsic_atomic_min", callee) ||
+       !strcmp("__intrinsic_atomic_max", callee) ||
+       !strcmp("__intrinsic_atomic_and", callee) ||
+       !strcmp("__intrinsic_atomic_or", callee) ||
+       !strcmp("__intrinsic_atomic_xor", callee) ||
+       !strcmp("__intrinsic_atomic_exchange", callee) ||
+       !strcmp("__intrinsic_atomic_comp_swap", callee)) {
       return lower_ssbo_atomic_intrinsic(ir);
    }
 
diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 1ab3afecc7e..a1ba9345e32 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -378,6 +378,9 @@ public:
       case ir_var_shader_storage:
 	 return this->lower_uniforms;
 
+      case ir_var_shader_shared:
+	 return false;
+
       case ir_var_function_in:
       case ir_var_const_in:
          return this->lower_temps;
diff --git a/src/glsl/nir/builtin_type_macros.h b/src/glsl/nir/builtin_type_macros.h
index 8e16ae45489..7bd2e4e6558 100644
--- a/src/glsl/nir/builtin_type_macros.h
+++ b/src/glsl/nir/builtin_type_macros.h
@@ -28,8 +28,6 @@
  * language version or extension might provide them.
  */
 
-#include "glsl_types.h"
-
 DECL_TYPE(error,  GL_INVALID_ENUM, GLSL_TYPE_ERROR, 0, 0)
 DECL_TYPE(void,   GL_INVALID_ENUM, GLSL_TYPE_VOID,  0, 0)
 
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index a26300d1d26..9a25f2fc905 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -691,15 +691,15 @@ nir_visitor::visit(ir_call *ir)
          op = nir_intrinsic_store_ssbo;
       } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
          op = nir_intrinsic_load_ssbo;
-      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) {
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) {
          op = nir_intrinsic_ssbo_atomic_add;
-      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) {
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) {
          op = nir_intrinsic_ssbo_atomic_and;
-      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) {
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) {
          op = nir_intrinsic_ssbo_atomic_or;
-      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) {
          op = nir_intrinsic_ssbo_atomic_xor;
-      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) {
          assert(ir->return_deref);
          if (ir->return_deref->type == glsl_type::int_type)
             op = nir_intrinsic_ssbo_atomic_imin;
@@ -707,7 +707,7 @@ nir_visitor::visit(ir_call *ir)
             op = nir_intrinsic_ssbo_atomic_umin;
          else
             unreachable("Invalid type");
-      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) {
          assert(ir->return_deref);
          if (ir->return_deref->type == glsl_type::int_type)
             op = nir_intrinsic_ssbo_atomic_imax;
@@ -715,9 +715,9 @@ nir_visitor::visit(ir_call *ir)
             op = nir_intrinsic_ssbo_atomic_umax;
          else
             unreachable("Invalid type");
-      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) {
          op = nir_intrinsic_ssbo_atomic_exchange;
-      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) {
          op = nir_intrinsic_ssbo_atomic_comp_swap;
       } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) {
          op = nir_intrinsic_shader_clock;
@@ -731,6 +731,38 @@ nir_visitor::visit(ir_call *ir)
          op = nir_intrinsic_memory_barrier_image;
       } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) {
          op = nir_intrinsic_memory_barrier_shared;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) {
+         op = nir_intrinsic_load_shared;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) {
+         op = nir_intrinsic_store_shared;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) {
+         op = nir_intrinsic_shared_atomic_add;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) {
+         op = nir_intrinsic_shared_atomic_and;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) {
+         op = nir_intrinsic_shared_atomic_or;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) {
+         op = nir_intrinsic_shared_atomic_xor;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) {
+         assert(ir->return_deref);
+         if (ir->return_deref->type == glsl_type::int_type)
+            op = nir_intrinsic_shared_atomic_imin;
+         else if (ir->return_deref->type == glsl_type::uint_type)
+            op = nir_intrinsic_shared_atomic_umin;
+         else
+            unreachable("Invalid type");
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) {
+         assert(ir->return_deref);
+         if (ir->return_deref->type == glsl_type::int_type)
+            op = nir_intrinsic_shared_atomic_imax;
+         else if (ir->return_deref->type == glsl_type::uint_type)
+            op = nir_intrinsic_shared_atomic_umax;
+         else
+            unreachable("Invalid type");
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) {
+         op = nir_intrinsic_shared_atomic_exchange;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) {
+         op = nir_intrinsic_shared_atomic_comp_swap;
       } else {
          unreachable("not reached");
       }
@@ -857,24 +889,12 @@ nir_visitor::visit(ir_call *ir)
          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
          assert(write_mask);
 
-         /* Check if we need the indirect version */
-         ir_constant *const_offset = offset->as_constant();
-         if (!const_offset) {
-            op = nir_intrinsic_store_ssbo_indirect;
-            ralloc_free(instr);
-            instr = nir_intrinsic_instr_create(shader, op);
-            instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
-            instr->const_index[0] = 0;
-         } else {
-            instr->const_index[0] = const_offset->value.u[0];
-         }
-
-         instr->const_index[1] = write_mask->value.u[0];
-
          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
+         instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
+         instr->const_index[0] = write_mask->value.u[0];
          instr->num_components = val->type->vector_elements;
 
-         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
@@ -885,20 +905,8 @@ nir_visitor::visit(ir_call *ir)
          param = param->get_next();
          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
 
-         /* Check if we need the indirect version */
-         ir_constant *const_offset = offset->as_constant();
-         if (!const_offset) {
-            op = nir_intrinsic_load_ssbo_indirect;
-            ralloc_free(instr);
-            instr = nir_intrinsic_instr_create(shader, op);
-            instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
-            instr->const_index[0] = 0;
-            dest = &instr->dest;
-         } else {
-            instr->const_index[0] = const_offset->value.u[0];
-         }
-
          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block));
+         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
 
          const glsl_type *type = ir->return_deref->var->type;
          instr->num_components = type->vector_elements;
@@ -978,6 +986,84 @@ nir_visitor::visit(ir_call *ir)
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
+      case nir_intrinsic_load_shared: {
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+         instr->const_index[0] = 0;
+         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+         const glsl_type *type = ir->return_deref->var->type;
+         instr->num_components = type->vector_elements;
+
+         /* Setup destination register */
+         nir_ssa_dest_init(&instr->instr, &instr->dest,
+                           type->vector_elements, NULL);
+
+         nir_builder_instr_insert(&b, &instr->instr);
+         break;
+      }
+      case nir_intrinsic_store_shared: {
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+         assert(write_mask);
+
+         instr->const_index[0] = 0;
+         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+         instr->const_index[1] = write_mask->value.u[0];
+
+         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+         instr->num_components = val->type->vector_elements;
+
+         nir_builder_instr_insert(&b, &instr->instr);
+         break;
+      }
+      case nir_intrinsic_shared_atomic_add:
+      case nir_intrinsic_shared_atomic_imin:
+      case nir_intrinsic_shared_atomic_umin:
+      case nir_intrinsic_shared_atomic_imax:
+      case nir_intrinsic_shared_atomic_umax:
+      case nir_intrinsic_shared_atomic_and:
+      case nir_intrinsic_shared_atomic_or:
+      case nir_intrinsic_shared_atomic_xor:
+      case nir_intrinsic_shared_atomic_exchange:
+      case nir_intrinsic_shared_atomic_comp_swap: {
+         int param_count = ir->actual_parameters.length();
+         assert(param_count == 2 || param_count == 3);
+
+         /* Offset */
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_instruction *inst = (ir_instruction *) param;
+         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+         /* data1 parameter (this is always present) */
+         param = param->get_next();
+         inst = (ir_instruction *) param;
+         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+         /* data2 parameter (only with atomic_comp_swap) */
+         if (param_count == 3) {
+            assert(op == nir_intrinsic_shared_atomic_comp_swap);
+            param = param->get_next();
+            inst = (ir_instruction *) param;
+            instr->src[2] =
+               nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+         }
+
+         /* Atomic result */
+         assert(ir->return_deref);
+         nir_ssa_dest_init(&instr->instr, &instr->dest,
+                           ir->return_deref->type->vector_elements, NULL);
+         nir_builder_instr_insert(&b, &instr->instr);
+         break;
+      }
       default:
          unreachable("not reached");
       }
@@ -1178,21 +1264,11 @@ nir_visitor::visit(ir_expression *ir)
    /* Some special cases */
    switch (ir->operation) {
    case ir_binop_ubo_load: {
-      ir_constant *const_index = ir->operands[1]->as_constant();
-
-      nir_intrinsic_op op;
-      if (const_index) {
-         op = nir_intrinsic_load_ubo;
-      } else {
-         op = nir_intrinsic_load_ubo_indirect;
-      }
-
-      nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
       load->num_components = ir->type->vector_elements;
-      load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
       load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
-      if (!const_index)
-         load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+      load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
       add_instr(&load->instr, ir->type->vector_elements);
 
       /*
diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp
index 64b5c0cb106..bc8677ba6fc 100644
--- a/src/glsl/nir/glsl_types.cpp
+++ b/src/glsl/nir/glsl_types.cpp
@@ -22,7 +22,7 @@
  */
 
 #include <stdio.h>
-#include "main/core.h" /* for Elements, MAX2 */
+#include "main/macros.h"
 #include "glsl_parser_extras.h"
 #include "glsl_types.h"
 #include "util/hash_table.h"
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 79df6d3df94..94bb76034a2 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1382,13 +1382,13 @@ static inline bool
 foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state)
 {
    if (reverse) {
-      foreach_list_typed_safe_reverse(nir_cf_node, node, node,
+      foreach_list_typed_reverse_safe(nir_cf_node, node, node,
                                       &if_stmt->else_list) {
          if (!foreach_cf_node(node, cb, reverse, state))
             return false;
       }
 
-      foreach_list_typed_safe_reverse(nir_cf_node, node, node,
+      foreach_list_typed_reverse_safe(nir_cf_node, node, node,
                                       &if_stmt->then_list) {
          if (!foreach_cf_node(node, cb, reverse, state))
             return false;
@@ -1412,7 +1412,7 @@ static inline bool
 foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state)
 {
    if (reverse) {
-      foreach_list_typed_safe_reverse(nir_cf_node, node, node, &loop->body) {
+      foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) {
          if (!foreach_cf_node(node, cb, reverse, state))
             return false;
       }
@@ -1472,7 +1472,7 @@ nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
    if (!cb(impl->end_block, state))
       return false;
 
-   foreach_list_typed_safe_reverse(nir_cf_node, node, node, &impl->body) {
+   foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) {
       if (!foreach_cf_node(node, cb, true, state))
          return false;
    }
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index b7374e17407..021c4280557 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1309,8 +1309,8 @@ nir_block_last_instr(nir_block *block)
    foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
 #define nir_foreach_instr_safe(block, instr) \
    foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
-#define nir_foreach_instr_safe_reverse(block, instr) \
-   foreach_list_typed_safe_reverse(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_reverse_safe(block, instr) \
+   foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
 
 typedef struct nir_if {
    nir_cf_node cf_node;
@@ -2018,7 +2018,7 @@ void nir_assign_var_locations(struct exec_list *var_list,
 void nir_lower_io(nir_shader *shader,
                   nir_variable_mode mode,
                   int (*type_size)(const struct glsl_type *));
-nir_src *nir_get_io_indirect_src(nir_intrinsic_instr *instr);
+nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
 nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
 
 void nir_lower_vars_to_ssa(nir_shader *shader);
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index b16ef503c92..32784f6398d 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -32,14 +32,6 @@ template = """\
 #include "util/half_float.h"
 #include "nir_constant_expressions.h"
 
-#if defined(__SUNPRO_CC)
-#include <ieeefp.h>
-static int isnormal(double x)
-{
-   return fpclass(x) == FP_NORMAL;
-}
-#endif
-
 /**
  * Evaluate one component of packSnorm4x8.
  */
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index de30db61eea..5086e297e8e 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -222,6 +222,33 @@ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
 INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
 INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
 
+/*
+ * CS shared variable atomic intrinsics
+ *
+ * All of the shared variable atomic memory operations read a value from
+ * memory, compute a new value using one of the operations below, write the
+ * new value to memory, and return the original value read.
+ *
+ * All operations take 2 sources except CompSwap that takes 3. These
+ * sources represent:
+ *
+ * 0: The offset into the shared variable storage region that the atomic
+ *    operation will operate on.
+ * 1: The data parameter to the atomic function (i.e. the value to add
+ *    in shared_atomic_add, etc).
+ * 2: For CompSwap only: the second data parameter.
+ */
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+
 #define SYSTEM_VALUE(name, components, num_indices) \
    INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
    NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@@ -247,56 +274,62 @@ SYSTEM_VALUE(num_work_groups, 3, 0)
 SYSTEM_VALUE(helper_invocation, 1, 0)
 
 /*
- * The format of the indices depends on the type of the load.  For uniforms,
- * the first index is the base address and the second index is an offset that
- * should be added to the base address.  (This way you can determine in the
- * back-end which variable is being accessed even in an array.)  For inputs,
- * the one and only index corresponds to the attribute slot.  UBO loads
- * have two indices the first of which is the descriptor set and the second
- * is the base address to load from.
+ * Load operations pull data from some piece of GPU memory.  All load
+ * operations operate in terms of offsets into some piece of theoretical
+ * memory.  Loads from externally visible memory (UBO and SSBO) simply take a
+ * byte offset as a source.  Loads from opaque memory (uniforms, inputs, etc.)
+ * take a base+offset pair where the base (const_index[0]) gives the location
+ * of the start of the variable being loaded and and the offset source is a
+ * offset into that variable.
  *
- * UBO loads have a (possibly constant) source which is the UBO buffer index.
- * For each type of load, the _indirect variant has one additional source
- * (the second in the case of UBO's) that is the is an indirect to be added to
- * the constant address or base offset to compute the final offset.
+ * Some load operations such as UBO/SSBO load and per_vertex loads take an
+ * additional source to specify which UBO/SSBO/vertex to load from.
  *
- * For vector backends, the address is in terms of one vec4, and so each array
- * element is +4 scalar components from the previous array element. For scalar
- * backends, the address is in terms of a single 4-byte float/int and arrays
- * elements begin immediately after the previous array element.
+ * The exact address type depends on the lowering pass that generates the
+ * load/store intrinsics.  Typically, this is vec4 units for things such as
+ * varying slots and float units for fragment shader inputs.  UBO and SSBO
+ * offsets are always in bytes.
  */
 
-#define LOAD(name, extra_srcs, indices, flags) \
-   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, indices, flags) \
-   INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
-             true, 0, 0, indices, flags)
+#define LOAD(name, srcs, indices, flags) \
+   INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
 
-LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(push_constant, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { buffer_index, offset }. No const_index */
+LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { vertex, offset }. const_index[] = { base } */
+LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { buffer_index, offset }. No const_index */
+LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { vertex, offset }. const_index[] = { base } */
+LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base, size } */
+LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 /*
- * Stores work the same way as loads, except now the first register input is
- * the value or array to store and the optional second input is the indirect
- * offset. SSBO stores are similar, but they accept an extra source for the
- * block index and an extra index with the writemask to use.
+ * Stores work the same way as loads, except now the first source is the value
+ * to store and the second (and possibly third) source specify where to store
+ * the value.  SSBO and shared memory stores also have a write mask as
+ * const_index[0].
  */
 
-#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
-   INTRINSIC(store_##name, 1 + extra_srcs, \
-             ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
-             false, 0, 0, 1 + extra_indices, flags) \
-   INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
-             ARR(0, 1, extra_srcs_size, extra_srcs_size), \
-             false, 0, 0, 1 + extra_indices, flags)
+#define STORE(name, srcs, indices, flags) \
+   INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
 
-STORE(output, 0, 0, 0, 0)
-STORE(per_vertex_output, 1, 1, 0, 0)
-STORE(ssbo, 1, 1, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base } */
+STORE(output, 2, 1, 0)
+/* src[] = { value, vertex, offset }. const_index[] = { base } */
+STORE(per_vertex_output, 3, 1, 0)
+/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
+STORE(ssbo, 3, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+STORE(shared, 2, 1, 0)
 
-LAST_INTRINSIC(store_ssbo_indirect)
+LAST_INTRINSIC(store_shared)
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
index c58c7785b3f..e2a2bb689a8 100644
--- a/src/glsl/nir/nir_lower_clip.c
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -74,6 +74,7 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
    store->const_index[0] = out->data.driver_location;
    store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
    store->src[0].is_ssa = true;
+   store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
    nir_builder_instr_insert(b, &store->instr);
 }
 
@@ -85,6 +86,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
    load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
    load->num_components = 4;
    load->const_index[0] = in->data.driver_location;
+   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
    nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
    nir_builder_instr_insert(b, &load->instr);
 
@@ -112,6 +114,7 @@ find_output_in_block(nir_block *block, void *void_state)
              intr->const_index[0] == state->drvloc) {
             assert(state->def == NULL);
             assert(intr->src[0].is_ssa);
+            assert(nir_src_as_const_value(intr->src[1]));
             state->def = intr->src[0].ssa;
 
 #if !defined(DEBUG)
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 5683e69d865..ec6d09d5b6d 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -86,18 +86,11 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
           stage == MESA_SHADER_TESS_CTRL;
 }
 
-static unsigned
-get_io_offset(nir_deref_var *deref, nir_instr *instr,
+static nir_ssa_def *
+get_io_offset(nir_builder *b, nir_deref_var *deref,
               nir_ssa_def **vertex_index,
-              nir_ssa_def **out_indirect,
-              struct lower_io_state *state)
+              int (*type_size)(const struct glsl_type *))
 {
-   nir_ssa_def *indirect = NULL;
-   unsigned base_offset = 0;
-
-   nir_builder *b = &state->builder;
-   b->cursor = nir_before_instr(instr);
-
    nir_deref *tail = &deref->deref;
 
    /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
@@ -115,64 +108,57 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr,
       *vertex_index = vtx;
    }
 
+   /* Just emit code and let constant-folding go to town */
+   nir_ssa_def *offset = nir_imm_int(b, 0);
+
    while (tail->child != NULL) {
       const struct glsl_type *parent_type = tail->type;
       tail = tail->child;
 
       if (tail->deref_type == nir_deref_type_array) {
          nir_deref_array *deref_array = nir_deref_as_array(tail);
-         unsigned size = state->type_size(tail->type);
+         unsigned size = type_size(tail->type);
 
-         base_offset += size * deref_array->base_offset;
+         offset = nir_iadd(b, offset,
+                           nir_imm_int(b, size * deref_array->base_offset));
 
          if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
             nir_ssa_def *mul =
                nir_imul(b, nir_imm_int(b, size),
                         nir_ssa_for_src(b, deref_array->indirect, 1));
 
-            indirect = indirect ? nir_iadd(b, indirect, mul) : mul;
+            offset = nir_iadd(b, offset, mul);
          }
       } else if (tail->deref_type == nir_deref_type_struct) {
          nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
 
+         unsigned field_offset = 0;
          for (unsigned i = 0; i < deref_struct->index; i++) {
-            base_offset +=
-               state->type_size(glsl_get_struct_field(parent_type, i));
+            field_offset += type_size(glsl_get_struct_field(parent_type, i));
          }
+         offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
       }
    }
 
-   *out_indirect = indirect;
-   return base_offset;
+   return offset;
 }
 
 static nir_intrinsic_op
 load_op(struct lower_io_state *state,
-        nir_variable_mode mode, bool per_vertex, bool has_indirect)
+        nir_variable_mode mode, bool per_vertex)
 {
    nir_intrinsic_op op;
    switch (mode) {
    case nir_var_shader_in:
-      if (per_vertex) {
-         op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect :
-                             nir_intrinsic_load_per_vertex_input;
-      } else {
-         op = has_indirect ? nir_intrinsic_load_input_indirect :
-                             nir_intrinsic_load_input;
-      }
+      op = per_vertex ? nir_intrinsic_load_per_vertex_input :
+                        nir_intrinsic_load_input;
       break;
    case nir_var_shader_out:
-      if (per_vertex) {
-         op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect :
-                             nir_intrinsic_load_per_vertex_output;
-      } else {
-         op = has_indirect ? nir_intrinsic_load_output_indirect :
-                             nir_intrinsic_load_output;
-      }
+      op = per_vertex ? nir_intrinsic_load_per_vertex_output :
+                        nir_intrinsic_load_output;
       break;
    case nir_var_uniform:
-      op = has_indirect ? nir_intrinsic_load_uniform_indirect :
-                          nir_intrinsic_load_uniform;
+      op = nir_intrinsic_load_uniform;
       break;
    default:
       unreachable("Unknown variable mode");
@@ -185,6 +171,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
 {
    struct lower_io_state *state = void_state;
 
+   nir_builder *b = &state->builder;
+
    nir_foreach_instr_safe(block, instr) {
       if (instr->type != nir_instr_type_intrinsic)
          continue;
@@ -205,38 +193,33 @@ nir_lower_io_block(nir_block *block, void *void_state)
           mode != nir_var_uniform)
          continue;
 
+      b->cursor = nir_before_instr(instr);
+
       switch (intrin->intrinsic) {
       case nir_intrinsic_load_var: {
          bool per_vertex =
             is_per_vertex_input(state, intrin->variables[0]->var) ||
             is_per_vertex_output(state, intrin->variables[0]->var);
 
-         nir_ssa_def *indirect;
+         nir_ssa_def *offset;
          nir_ssa_def *vertex_index;
 
-         unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
-                                         per_vertex ? &vertex_index : NULL,
-                                         &indirect, state);
+         offset = get_io_offset(b, intrin->variables[0],
+                                per_vertex ? &vertex_index : NULL,
+                                state->type_size);
 
          nir_intrinsic_instr *load =
             nir_intrinsic_instr_create(state->mem_ctx,
-                                       load_op(state, mode, per_vertex,
-                                               indirect));
+                                       load_op(state, mode, per_vertex));
          load->num_components = intrin->num_components;
 
-         unsigned location = intrin->variables[0]->var->data.driver_location;
-         if (mode == nir_var_uniform) {
-            load->const_index[0] = location;
-            load->const_index[1] = offset;
-         } else {
-            load->const_index[0] = location + offset;
-         }
+         load->const_index[0] =
+            intrin->variables[0]->var->data.driver_location;
 
          if (per_vertex)
             load->src[0] = nir_src_for_ssa(vertex_index);
 
-         if (indirect)
-            load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect);
+         load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset);
 
          if (intrin->dest.is_ssa) {
             nir_ssa_dest_init(&load->instr, &load->dest,
@@ -255,38 +238,33 @@ nir_lower_io_block(nir_block *block, void *void_state)
       case nir_intrinsic_store_var: {
          assert(mode == nir_var_shader_out);
 
-         nir_ssa_def *indirect;
+         nir_ssa_def *offset;
          nir_ssa_def *vertex_index;
 
          bool per_vertex =
             is_per_vertex_output(state, intrin->variables[0]->var);
 
-         unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
-                                         per_vertex ? &vertex_index : NULL,
-                                         &indirect, state);
-         offset += intrin->variables[0]->var->data.driver_location;
+         offset = get_io_offset(b, intrin->variables[0],
+                                per_vertex ? &vertex_index : NULL,
+                                state->type_size);
 
-         nir_intrinsic_op store_op;
-         if (per_vertex) {
-            store_op = indirect ? nir_intrinsic_store_per_vertex_output_indirect
-                                : nir_intrinsic_store_per_vertex_output;
-         } else {
-            store_op = indirect ? nir_intrinsic_store_output_indirect
-                                : nir_intrinsic_store_output;
-         }
+         nir_intrinsic_op store_op =
+            per_vertex ? nir_intrinsic_store_per_vertex_output :
+                         nir_intrinsic_store_output;
 
          nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
                                                                  store_op);
          store->num_components = intrin->num_components;
-         store->const_index[0] = offset;
 
          nir_src_copy(&store->src[0], &intrin->src[0], store);
 
+         store->const_index[0] =
+            intrin->variables[0]->var->data.driver_location;
+
          if (per_vertex)
             store->src[1] = nir_src_for_ssa(vertex_index);
 
-         if (indirect)
-            store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(indirect);
+         store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset);
 
          nir_instr_insert_before(&intrin->instr, &store->instr);
          nir_instr_remove(&intrin->instr);
@@ -330,21 +308,24 @@ nir_lower_io(nir_shader *shader, nir_variable_mode mode,
 }
 
 /**
- * Return the indirect source for a load/store indirect intrinsic.
+ * Return the offset soruce for a load/store intrinsic.
  */
 nir_src *
-nir_get_io_indirect_src(nir_intrinsic_instr *instr)
+nir_get_io_offset_src(nir_intrinsic_instr *instr)
 {
    switch (instr->intrinsic) {
-   case nir_intrinsic_load_input_indirect:
-   case nir_intrinsic_load_output_indirect:
-   case nir_intrinsic_load_uniform_indirect:
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_output:
+   case nir_intrinsic_load_uniform:
       return &instr->src[0];
-   case nir_intrinsic_load_per_vertex_input_indirect:
-   case nir_intrinsic_load_per_vertex_output_indirect:
-   case nir_intrinsic_store_output_indirect:
+   case nir_intrinsic_load_ubo:
+   case nir_intrinsic_load_ssbo:
+   case nir_intrinsic_load_per_vertex_input:
+   case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_store_output:
       return &instr->src[1];
-   case nir_intrinsic_store_per_vertex_output_indirect:
+   case nir_intrinsic_store_ssbo:
+   case nir_intrinsic_store_per_vertex_output:
       return &instr->src[2];
    default:
       return NULL;
@@ -360,11 +341,8 @@ nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
    switch (instr->intrinsic) {
    case nir_intrinsic_load_per_vertex_input:
    case nir_intrinsic_load_per_vertex_output:
-   case nir_intrinsic_load_per_vertex_input_indirect:
-   case nir_intrinsic_load_per_vertex_output_indirect:
       return &instr->src[0];
    case nir_intrinsic_store_per_vertex_output:
-   case nir_intrinsic_store_per_vertex_output_indirect:
       return &instr->src[1];
    default:
       return NULL;
diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c
index aa124d9e6cc..2f5927f6406 100644
--- a/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -91,13 +91,9 @@ is_phi_src_scalarizable(nir_phi_src *src,
       case nir_intrinsic_interp_var_at_sample:
       case nir_intrinsic_interp_var_at_offset:
       case nir_intrinsic_load_uniform:
-      case nir_intrinsic_load_uniform_indirect:
       case nir_intrinsic_load_ubo:
-      case nir_intrinsic_load_ubo_indirect:
       case nir_intrinsic_load_ssbo:
-      case nir_intrinsic_load_ssbo_indirect:
       case nir_intrinsic_load_input:
-      case nir_intrinsic_load_input_indirect:
          return true;
       default:
          break;
diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c
index 19deafab37a..858088237e3 100644
--- a/src/glsl/nir/nir_lower_samplers.c
+++ b/src/glsl/nir/nir_lower_samplers.c
@@ -25,7 +25,6 @@
 
 #include "nir.h"
 #include "nir_builder.h"
-#include "../program.h"
 #include "program/hash_table.h"
 #include "ir_uniform.h"
 
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c
index 6995b9d6bc1..7df12e070f1 100644
--- a/src/glsl/nir/nir_lower_two_sided_color.c
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -73,6 +73,7 @@ load_input(nir_builder *b, nir_variable *in)
    load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
    load->num_components = 4;
    load->const_index[0] = in->data.driver_location;
+   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
    nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
    nir_builder_instr_insert(b, &load->instr);
 
@@ -151,6 +152,7 @@ nir_lower_two_sided_color_block(nir_block *block, void *void_state)
          unsigned drvloc =
             state->colors[idx].front->data.driver_location;
          if (intr->const_index[0] == drvloc) {
+            assert(nir_src_as_const_value(intr->src[0]));
             break;
          }
       }
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index 30ede52b146..3843f21c0ee 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -185,8 +185,10 @@ optimizations = [
    (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
    (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
    # Boolean simplifications
-   (('ine', 'a@bool', 0), 'a'),
-   (('ieq', 'a@bool', 0), ('inot', 'a')),
+   (('ieq', 'a@bool', True), a),
+   (('ine', 'a@bool', True), ('inot', a)),
+   (('ine', 'a@bool', False), a),
+   (('ieq', 'a@bool', False), ('inot', 'a')),
    (('bcsel', a, True, False), ('ine', a, 0)),
    (('bcsel', a, False, True), ('ieq', a, 0)),
    (('bcsel', True, b, c), b),
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 76bfc47c2a0..10f46cef1de 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -439,21 +439,15 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
 
    switch (instr->intrinsic) {
    case nir_intrinsic_load_uniform:
-   case nir_intrinsic_load_uniform_indirect:
       var_list = &state->shader->uniforms;
       break;
    case nir_intrinsic_load_input:
-   case nir_intrinsic_load_input_indirect:
    case nir_intrinsic_load_per_vertex_input:
-   case nir_intrinsic_load_per_vertex_input_indirect:
       var_list = &state->shader->inputs;
       break;
    case nir_intrinsic_load_output:
-   case nir_intrinsic_load_output_indirect:
    case nir_intrinsic_store_output:
-   case nir_intrinsic_store_output_indirect:
    case nir_intrinsic_store_per_vertex_output:
-   case nir_intrinsic_store_per_vertex_output_indirect:
       var_list = &state->shader->outputs;
       break;
    default:
diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c
index d014f3cd811..68edea09309 100644
--- a/src/glsl/nir/spirv_to_nir.c
+++ b/src/glsl/nir/spirv_to_nir.c
@@ -1112,8 +1112,7 @@ nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding,
 static struct vtn_ssa_value *
 _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op,
                 unsigned set, unsigned binding, nir_variable_mode mode,
-                nir_ssa_def *index, unsigned offset, nir_ssa_def *indirect,
-                struct vtn_type *type)
+                nir_ssa_def *index, nir_ssa_def *offset, struct vtn_type *type)
 {
    struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value);
    val->type = type->type;
@@ -1121,26 +1120,20 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op,
    if (glsl_type_is_vector_or_scalar(type->type)) {
       nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
       load->num_components = glsl_get_vector_elements(type->type);
-      load->const_index[0] = offset;
 
       switch (op) {
-      case nir_intrinsic_load_ubo_indirect:
-      case nir_intrinsic_load_ssbo_indirect:
-         load->src[1] = nir_src_for_ssa(indirect);
-         /* fall through */
       case nir_intrinsic_load_ubo:
       case nir_intrinsic_load_ssbo: {
          nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb,
                                                             set, binding,
                                                             mode, index);
          load->src[0] = nir_src_for_ssa(res_index);
+         load->src[1] = nir_src_for_ssa(offset);
          break;
       }
 
       case nir_intrinsic_load_push_constant:
-         break; /* Nothing to do */
-      case nir_intrinsic_load_push_constant_indirect:
-         load->src[0] = nir_src_for_ssa(indirect);
+         load->src[0] = nir_src_for_ssa(offset);
          break;
 
       default:
@@ -1155,15 +1148,17 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op,
       val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
       if (glsl_type_is_struct(type->type)) {
          for (unsigned i = 0; i < elems; i++) {
+            nir_ssa_def *child_offset =
+               nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
             val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index,
-                                            offset + type->offsets[i],
-                                            indirect, type->members[i]);
+                                            child_offset, type->members[i]);
          }
       } else {
          for (unsigned i = 0; i < elems; i++) {
+            nir_ssa_def *child_offset =
+               nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
             val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index,
-                                            offset + i * type->stride,
-                                            indirect, type->array_element);
+                                            child_offset,type->array_element);
          }
       }
    }
@@ -1174,8 +1169,7 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op,
 static void
 vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src,
                      struct vtn_type **type, nir_deref *src_tail,
-                     nir_ssa_def **index,
-                     unsigned *offset, nir_ssa_def **indirect)
+                     nir_ssa_def **index, nir_ssa_def **offset)
 {
    nir_deref *deref = &src->deref;
 
@@ -1191,27 +1185,30 @@ vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src,
       *index = nir_imm_int(&b->nb, 0);
    }
 
-   *offset = 0;
-   *indirect = NULL;
+   *offset = nir_imm_int(&b->nb, 0);
    while (deref != src_tail) {
       deref = deref->child;
       switch (deref->deref_type) {
       case nir_deref_type_array: {
          nir_deref_array *deref_array = nir_deref_as_array(deref);
-         if (deref_array->deref_array_type == nir_deref_array_type_direct) {
-            *offset += (*type)->stride * deref_array->base_offset;
-         } else {
-            nir_ssa_def *off = nir_imul(&b->nb, deref_array->indirect.ssa,
-                                        nir_imm_int(&b->nb, (*type)->stride));
-            *indirect = *indirect ? nir_iadd(&b->nb, *indirect, off) : off;
-         }
+         nir_ssa_def *off = nir_imm_int(&b->nb, deref_array->base_offset);
+
+         if (deref_array->deref_array_type == nir_deref_array_type_indirect)
+            off = nir_iadd(&b->nb, off, deref_array->indirect.ssa);
+
+         off = nir_imul(&b->nb, off, nir_imm_int(&b->nb, (*type)->stride));
+         *offset = nir_iadd(&b->nb, *offset, off);
+
          *type = (*type)->array_element;
          break;
       }
 
       case nir_deref_type_struct: {
          nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
-         *offset += (*type)->offsets[deref_struct->index];
+
+         unsigned elem_off = (*type)->offsets[deref_struct->index];
+         *offset = nir_iadd(&b->nb, *offset, nir_imm_int(&b->nb, elem_off));
+
          *type = (*type)->members[deref_struct->index];
          break;
       }
@@ -1227,9 +1224,8 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src,
                struct vtn_type *type, nir_deref *src_tail)
 {
    nir_ssa_def *index;
-   unsigned offset;
-   nir_ssa_def *indirect;
-   vtn_block_get_offset(b, src, &type, src_tail, &index, &offset, &indirect);
+   nir_ssa_def *offset;
+   vtn_block_get_offset(b, src, &type, src_tail, &index, &offset);
 
    nir_intrinsic_op op;
    if (src->var->data.mode == nir_var_uniform) {
@@ -1237,25 +1233,22 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src,
          /* UBO load */
          assert(src->var->data.binding >= 0);
 
-         op = indirect ? nir_intrinsic_load_ubo_indirect
-                       : nir_intrinsic_load_ubo;
+         op = nir_intrinsic_load_ubo;
       } else {
          /* Push constant load */
          assert(src->var->data.descriptor_set == -1 &&
                 src->var->data.binding == -1);
 
-         op = indirect ? nir_intrinsic_load_push_constant_indirect
-                       : nir_intrinsic_load_push_constant;
+         op = nir_intrinsic_load_push_constant;
       }
    } else {
       assert(src->var->data.mode == nir_var_shader_storage);
-      op = indirect ? nir_intrinsic_load_ssbo_indirect
-                    : nir_intrinsic_load_ssbo;
+      op = nir_intrinsic_load_ssbo;
    }
 
    return _vtn_block_load(b, op, src->var->data.descriptor_set,
                           src->var->data.binding, src->var->data.mode,
-                          index, offset, indirect, type);
+                          index, offset, type);
 }
 
 /*
@@ -1319,14 +1312,13 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src,
 static void
 _vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op,
                  struct vtn_ssa_value *src, unsigned set, unsigned binding,
-                 nir_variable_mode mode, nir_ssa_def *index, unsigned offset,
-                 nir_ssa_def *indirect, struct vtn_type *type)
+                 nir_variable_mode mode, nir_ssa_def *index,
+                 nir_ssa_def *offset, struct vtn_type *type)
 {
    assert(src->type == type->type);
    if (glsl_type_is_vector_or_scalar(type->type)) {
       nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
       store->num_components = glsl_get_vector_elements(type->type);
-      store->const_index[0] = offset;
       store->const_index[1] = (1 << store->num_components) - 1;
       store->src[0] = nir_src_for_ssa(src->def);
 
@@ -1334,24 +1326,24 @@ _vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op,
                                                          set, binding,
                                                          mode, index);
       store->src[1] = nir_src_for_ssa(res_index);
-
-      if (op == nir_intrinsic_store_ssbo_indirect)
-         store->src[2] = nir_src_for_ssa(indirect);
+      store->src[2] = nir_src_for_ssa(offset);
 
       nir_builder_instr_insert(&b->nb, &store->instr);
    } else {
       unsigned elems = glsl_get_length(type->type);
       if (glsl_type_is_struct(type->type)) {
          for (unsigned i = 0; i < elems; i++) {
+            nir_ssa_def *child_offset =
+               nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
             _vtn_block_store(b, op, src->elems[i], set, binding, mode,
-                             index, offset + type->offsets[i], indirect,
-                             type->members[i]);
+                             index, child_offset, type->members[i]);
          }
       } else {
          for (unsigned i = 0; i < elems; i++) {
+            nir_ssa_def *child_offset =
+               nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
             _vtn_block_store(b, op, src->elems[i], set, binding, mode,
-                             index, offset + i * type->stride, indirect,
-                             type->array_element);
+                             index, child_offset, type->array_element);
          }
       }
    }
@@ -1363,16 +1355,14 @@ vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
                 nir_deref *dest_tail)
 {
    nir_ssa_def *index;
-   unsigned offset;
-   nir_ssa_def *indirect;
-   vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset, &indirect);
+   nir_ssa_def *offset;
+   vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset);
 
-   nir_intrinsic_op op = indirect ? nir_intrinsic_store_ssbo_indirect
-                                  : nir_intrinsic_store_ssbo;
+   nir_intrinsic_op op = nir_intrinsic_store_ssbo;
 
    return _vtn_block_store(b, op, src, dest->var->data.descriptor_set,
                            dest->var->data.binding, dest->var->data.mode,
-                           index, offset, indirect, type);
+                           index, offset, type);
 }
 
 static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b,
@@ -1545,7 +1535,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
 
          /* We have exactly one push constant block */
          assert(b->shader->num_uniforms == 0);
-         b->shader->num_uniforms = vtn_type_block_size(type);
+         b->shader->num_uniforms = vtn_type_block_size(type) * 4;
          break;
       case SpvStorageClassInput:
          var->data.mode = nir_var_shader_in;
diff --git a/src/glsl/opt_array_splitting.cpp b/src/glsl/opt_array_splitting.cpp
index 9e73f3c44bb..89ce76bed2b 100644
--- a/src/glsl/opt_array_splitting.cpp
+++ b/src/glsl/opt_array_splitting.cpp
@@ -188,6 +188,10 @@ ir_array_reference_visitor::visit_enter(ir_dereference_array *ir)
    if (entry && !ir->array_index->as_constant())
       entry->split = false;
 
+   /* If the index is also array dereference, visit index. */
+   if (ir->array_index->as_dereference_array())
+      visit_enter(ir->array_index->as_dereference_array());
+
    return visit_continue_with_parent;
 }
 
diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp
index 184aaa1c297..fb24a4fad04 100644
--- a/src/glsl/opt_constant_propagation.cpp
+++ b/src/glsl/opt_constant_propagation.cpp
@@ -500,7 +500,8 @@ ir_constant_propagation_visitor::add_constant(ir_assignment *ir)
     * the variable value isn't modified between this assignment and the next
     * instruction where its value is read.
     */
-   if (deref->var->data.mode == ir_var_shader_storage)
+   if (deref->var->data.mode == ir_var_shader_storage ||
+       deref->var->data.mode == ir_var_shader_shared)
       return;
 
    entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant);
diff --git a/src/glsl/opt_constant_variable.cpp b/src/glsl/opt_constant_variable.cpp
index cdfbc340243..56f6a819e1e 100644
--- a/src/glsl/opt_constant_variable.cpp
+++ b/src/glsl/opt_constant_variable.cpp
@@ -120,7 +120,8 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir)
     * and we can't be sure that this variable won't be written by another
     * thread.
     */
-   if (var->data.mode == ir_var_shader_storage)
+   if (var->data.mode == ir_var_shader_storage ||
+       var->data.mode == ir_var_shader_shared)
       return visit_continue;
 
    constval = ir->rhs->constant_expression_value();
diff --git a/src/glsl/opt_copy_propagation.cpp b/src/glsl/opt_copy_propagation.cpp
index f20699563fd..5d4cb4fe613 100644
--- a/src/glsl/opt_copy_propagation.cpp
+++ b/src/glsl/opt_copy_propagation.cpp
@@ -330,7 +330,8 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir)
 	  */
 	 ir->condition = new(ralloc_parent(ir)) ir_constant(false);
 	 this->progress = true;
-      } else if (lhs_var->data.mode != ir_var_shader_storage) {
+      } else if (lhs_var->data.mode != ir_var_shader_storage &&
+                 lhs_var->data.mode != ir_var_shader_shared) {
 	 entry = new(this->acp) acp_entry(lhs_var, rhs_var);
 	 this->acp->push_tail(entry);
       }
diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp
index 68b70eedf92..53871130e12 100644
--- a/src/glsl/opt_dead_builtin_varyings.cpp
+++ b/src/glsl/opt_dead_builtin_varyings.cpp
@@ -85,7 +85,7 @@ public:
    {
       ir_variable *var = ir->variable_referenced();
 
-      if (!var || var->data.mode != this->mode)
+      if (!var || var->data.mode != this->mode || !var->type->is_array())
          return visit_continue;
 
       if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) {
diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp
index c5be166e75a..c2ce0b94ece 100644
--- a/src/glsl/opt_dead_code.cpp
+++ b/src/glsl/opt_dead_code.cpp
@@ -75,6 +75,20 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
 	  || !entry->declaration)
 	 continue;
 
+      /* Section 7.4.1 (Shader Interface Matching) of the OpenGL 4.5
+       * (Core Profile) spec says:
+       *
+       *    "With separable program objects, interfaces between shader
+       *    stages may involve the outputs from one program object and the
+       *    inputs from a second program object.  For such interfaces, it is
+       *    not possible to detect mismatches at link time, because the
+       *    programs are linked separately. When each such program is
+       *    linked, all inputs or outputs interfacing with another program
+       *    stage are treated as active."
+       */
+      if (entry->var->data.always_active_io)
+         continue;
+
       if (!entry->assign_list.is_empty()) {
 	 /* Remove all the dead assignments to the variable we found.
 	  * Don't do so if it's a shader or function output, though.
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index 7d59c787aed..84266b0cb58 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -69,7 +69,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
 
 void
 _mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
-                   const char *, int)
+                   const char *)
 {
 }
 
diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h
index a9ca5e4e3d3..f853a187bf4 100644
--- a/src/glsl/standalone_scaffolding.h
+++ b/src/glsl/standalone_scaffolding.h
@@ -52,7 +52,7 @@ _mesa_clear_shader_program_data(struct gl_shader_program *);
 
 extern "C" void
 _mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
-                   const char *msg, int len);
+                   const char *msg);
 
 static inline gl_shader_stage
 _mesa_shader_enum_to_shader_stage(GLenum v)