26 files changed, 351 insertions, 203 deletions
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 1396758b7c2..2e180a545ae 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -145,8 +145,8 @@ libglsl_la_SOURCES =					\
 	$(LIBGLSL_FILES)				\
 	$(NIR_FILES)					\
 	$(SPIRV_FILES)					\
-	$(NIR_GENERATED_FILES)
-
+	$(NIR_GENERATED_FILES)				\
+	$(GLSL_TO_NIR_FILES)
 
 libnir_la_SOURCES =					\
 	$(NIR_FILES)					\
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 89113bcd1c6..80d21a0718d 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -18,8 +18,6 @@ NIR_GENERATED_FILES = \
 	nir/nir_opt_algebraic.c
 
 NIR_FILES = \
-	nir/glsl_to_nir.cpp \
-	nir/glsl_to_nir.h \
 	nir/glsl_types.cpp \
 	nir/glsl_types.h \
 	nir/builtin_type_macros.h \
@@ -225,6 +223,11 @@ LIBGLSL_FILES = \
 	s_expression.cpp \
 	s_expression.h
 
+# glsl to nir pass
+GLSL_TO_NIR_FILES = \
+	nir/glsl_to_nir.cpp \
+	nir/glsl_to_nir.h
+
 # glsl_compiler
 
 GLSL_COMPILER_CXX_FILES = \
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index 70bf5b09c3c..a9d38c163b7 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -65,6 +65,7 @@ for l in ('LIBGLCPP_FILES', 'LIBGLSL_FILES'):
 # XXX: Remove this once we build NIR and NIR_FILES.
 glsl_sources += [
     'nir/glsl_types.cpp',
+    'nir/shader_enums.c',
 ]
 
 if env['msvc']:
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 376dfda2c84..b5e08c048c5 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3106,7 +3106,7 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
                _mesa_glsl_error(loc, state,
                                 "misaligned atomic counter offset");
 
-            var->data.atomic.offset = *offset;
+            var->data.offset = *offset;
             *offset += var->type->atomic_size();
 
          } else {
@@ -3518,7 +3518,7 @@ get_variable_being_redeclared(ir_variable *var, YYLTYPE loc,
               state->is_version(150, 0))
               && strcmp(var->name, "gl_FragCoord") == 0
               && earlier->type == var->type
-              && earlier->data.mode == var->data.mode) {
+              && var->data.mode == ir_var_shader_in) {
       /* Allow redeclaration of gl_FragCoord for ARB_fcc layout
        * qualifiers.
        */
@@ -6170,7 +6170,7 @@ ast_type_specifier::hir(exec_list *instructions,
  * The number of fields processed.  A pointer to the array structure fields is
  * stored in \c *fields_ret.
  */
-unsigned
+static unsigned
 ast_process_struct_or_iface_block_members(exec_list *instructions,
                                           struct _mesa_glsl_parse_state *state,
                                           exec_list *declarations,
@@ -6376,12 +6376,13 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
             if (process_qualifier_constant(state, &loc, "location",
                                            qual->location, &qual_location)) {
                fields[i].location = VARYING_SLOT_VAR0 + qual_location;
-               expl_location = fields[i].location + 1;
+               expl_location = fields[i].location +
+                  fields[i].type->count_attribute_slots(false);
             }
          } else {
             if (layout && layout->flags.q.explicit_location) {
                fields[i].location = expl_location;
-               expl_location = expl_location + 1;
+               expl_location += fields[i].type->count_attribute_slots(false);
             } else {
                fields[i].location = -1;
             }
@@ -6485,7 +6486,7 @@ ast_struct_specifier::hir(exec_list *instructions,
 
    state->struct_specifier_depth++;
 
-   unsigned expl_location = -1;
+   unsigned expl_location = 0;
    if (layout && layout->flags.q.explicit_location) {
       if (!process_qualifier_constant(state, &loc, "location",
                                       layout->location, &expl_location)) {
@@ -6672,7 +6673,7 @@ ast_interface_block::hir(exec_list *instructions,
       return NULL;
    }
 
-   unsigned expl_location = -1;
+   unsigned expl_location = 0;
    if (layout.flags.q.explicit_location) {
       if (!process_qualifier_constant(state, &loc, "location",
                                       layout.location, &expl_location)) {
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index e82c99ee3bb..221aab0043b 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -1057,8 +1057,16 @@ builtin_variable_generator::generate_fs_special_vars()
 {
    ir_variable *var;
 
-   add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord");
-   add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing");
+   if (this->state->ctx->Const.GLSLFragCoordIsSysVal)
+      add_system_value(SYSTEM_VALUE_FRAG_COORD, vec4_t, "gl_FragCoord");
+   else
+      add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord");
+
+   if (this->state->ctx->Const.GLSLFrontFacingIsSysVal)
+      add_system_value(SYSTEM_VALUE_FRONT_FACE, bool_t, "gl_FrontFacing");
+   else
+      add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing");
+
    if (state->is_version(120, 100))
       add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord");
 
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 2fd4cf04079..ef1a6575aaa 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2506,6 +2506,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 
               if (extensions->ARB_shader_subroutine)
                  add_builtin_define(parser, "GL_ARB_shader_subroutine", 1);
+
+              if (extensions->ARB_shader_draw_parameters)
+                 add_builtin_define(parser, "GL_ARB_shader_draw_parameters", 1);
 	   }
 	}
 
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index fb53f6e2e2d..131a5641f8f 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -414,44 +414,6 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version,
 }
 
 
-/**
- * Translate a gl_shader_stage to a short shader stage name for debug
- * printouts and error messages.
- */
-const char *
-_mesa_shader_stage_to_string(unsigned stage)
-{
-   switch (stage) {
-   case MESA_SHADER_VERTEX:   return "vertex";
-   case MESA_SHADER_FRAGMENT: return "fragment";
-   case MESA_SHADER_GEOMETRY: return "geometry";
-   case MESA_SHADER_COMPUTE:  return "compute";
-   case MESA_SHADER_TESS_CTRL: return "tess ctrl";
-   case MESA_SHADER_TESS_EVAL: return "tess eval";
-   }
-
-   unreachable("Unknown shader stage.");
-}
-
-/**
- * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
- * for debug printouts and error messages.
- */
-const char *
-_mesa_shader_stage_to_abbrev(unsigned stage)
-{
-   switch (stage) {
-   case MESA_SHADER_VERTEX:   return "VS";
-   case MESA_SHADER_FRAGMENT: return "FS";
-   case MESA_SHADER_GEOMETRY: return "GS";
-   case MESA_SHADER_COMPUTE:  return "CS";
-   case MESA_SHADER_TESS_CTRL: return "TCS";
-   case MESA_SHADER_TESS_EVAL: return "TES";
-   }
-
-   unreachable("Unknown shader stage.");
-}
-
 /* This helper function will append the given message to the shader's
    info log and report it via GL_ARB_debug_output. Per that extension,
    'type' is one of the enum values classifying the message, and
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index afb99afa5cd..ecc29920918 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -731,16 +731,6 @@ extern bool _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp,
 extern "C" {
 #endif
 
-/**
- * Get the textual name of the specified shader stage (which is a
- * gl_shader_stage).
- */
-extern const char *
-_mesa_shader_stage_to_string(unsigned stage);
-
-extern const char *
-_mesa_shader_stage_to_abbrev(unsigned stage);
-
 extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log,
                       const struct gl_extensions *extensions, struct gl_context *gl_ctx);
 
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 70227070ca7..d82bccd5d2f 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1674,7 +1674,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
    this->data.mode = mode;
    this->data.interpolation = INTERP_QUALIFIER_NONE;
    this->data.max_array_access = 0;
-   this->data.atomic.offset = 0;
+   this->data.offset = 0;
    this->data.precision = GLSL_PRECISION_NONE;
    this->data.image_read_only = false;
    this->data.image_write_only = false;
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 159f94d9edd..a728c036e6b 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -871,9 +871,7 @@ public:
       /**
        * Location an atomic counter is stored at.
        */
-      struct {
-         unsigned offset;
-      } atomic;
+      unsigned offset;
 
       /**
        * Highest element accessed with a constant expression array index
@@ -1726,6 +1724,8 @@ public:
       return operation == ir_binop_all_equal ||
              operation == ir_binop_any_nequal ||
              operation == ir_binop_dot ||
+             operation == ir_binop_vector_extract ||
+             operation == ir_triop_vector_insert ||
              operation == ir_quadop_vector;
    }
 
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 5bf5ce54f78..f02e959bd18 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -1824,9 +1824,7 @@ ir_swizzle::constant_expression_value(struct hash_table *variable_context)
 ir_constant *
 ir_dereference_variable::constant_expression_value(struct hash_table *variable_context)
 {
-   /* This may occur during compile and var->type is glsl_type::error_type */
-   if (!var)
-      return NULL;
+   assert(var);
 
    /* Give priority to the context hashtable, if it exists */
    if (variable_context) {
diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
index 3aa52dbd18a..277d4737ab7 100644
--- a/src/glsl/link_atomics.cpp
+++ b/src/glsl/link_atomics.cpp
@@ -83,16 +83,16 @@ namespace {
       const active_atomic_counter *const first = (active_atomic_counter *) a;
       const active_atomic_counter *const second = (active_atomic_counter *) b;
 
-      return int(first->var->data.atomic.offset) - int(second->var->data.atomic.offset);
+      return int(first->var->data.offset) - int(second->var->data.offset);
    }
 
    bool
    check_atomic_counters_overlap(const ir_variable *x, const ir_variable *y)
    {
-      return ((x->data.atomic.offset >= y->data.atomic.offset &&
-               x->data.atomic.offset < y->data.atomic.offset + y->type->atomic_size()) ||
-              (y->data.atomic.offset >= x->data.atomic.offset &&
-               y->data.atomic.offset < x->data.atomic.offset + x->type->atomic_size()));
+      return ((x->data.offset >= y->data.offset &&
+               x->data.offset < y->data.offset + y->type->atomic_size()) ||
+              (y->data.offset >= x->data.offset &&
+               y->data.offset < x->data.offset + x->type->atomic_size()));
    }
 
    void
@@ -158,7 +158,7 @@ namespace {
             ir_variable *var = node->as_variable();
 
             if (var && var->type->contains_atomic()) {
-               int offset = var->data.atomic.offset;
+               int offset = var->data.offset;
                unsigned uniform_loc = var->data.location;
                process_atomic_variable(var->type, prog, &uniform_loc,
                                        var, buffers, num_buffers, &offset, i);
@@ -185,7 +185,7 @@ namespace {
                linker_error(prog, "Atomic counter %s declared at offset %d "
                             "which is already in use.",
                             buffers[i].counters[j].var->name,
-                            buffers[i].counters[j].var->data.atomic.offset);
+                            buffers[i].counters[j].var->data.offset);
             }
          }
       }
@@ -237,7 +237,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
             var->data.binding = i;
 
          storage->atomic_buffer_index = i;
-         storage->offset = var->data.atomic.offset;
+         storage->offset = var->data.offset;
          storage->array_stride = (var->type->is_array() ?
                                   var->type->without_array()->atomic_size() : 0);
          if (!var->type->is_matrix())
diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp
index 422739af063..54fea700b53 100644
--- a/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/glsl/link_uniform_block_active_visitor.cpp
@@ -24,7 +24,7 @@
 #include "link_uniform_block_active_visitor.h"
 #include "program.h"
 
-link_uniform_block_active *
+static link_uniform_block_active *
 process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var)
 {
    const hash_entry *const existing_block =
@@ -92,7 +92,7 @@ process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var)
  * and not over complicating the code we will end up with a count of 8.
  * Here each dimension has 2 different indices counted so we end up with 2*2*2
  */
-struct uniform_block_array_elements **
+static struct uniform_block_array_elements **
 process_arrays(void *mem_ctx, ir_dereference_array *ir,
                struct link_uniform_block_active *block)
 {
diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index d5d30bb0a0d..7d755765852 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -266,7 +266,7 @@ process_block_array(struct uniform_block_array_elements *ub_array, char **name,
 /* This function resizes the array types of the block so that later we can use
  * this new size to correctly calculate the offest for indirect indexing.
  */
-const glsl_type *
+static const glsl_type *
 resize_block_array(const glsl_type *type,
                    struct uniform_block_array_elements *ub_array)
 {
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 9cc77feb78a..3853abdb8e6 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -41,6 +41,29 @@
 
 
 /**
+ * Get the varying type stripped of the outermost array if we're processing
+ * a stage whose varyings are arrays indexed by a vertex number (such as
+ * geometry shader inputs).
+ */
+static const glsl_type *
+get_varying_type(const ir_variable *var, gl_shader_stage stage)
+{
+   const glsl_type *type = var->type;
+
+   if (!var->data.patch &&
+       ((var->data.mode == ir_var_shader_out &&
+         stage == MESA_SHADER_TESS_CTRL) ||
+        (var->data.mode == ir_var_shader_in &&
+         (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
+          stage == MESA_SHADER_GEOMETRY)))) {
+      assert(type->is_array());
+      type = type->fields.array;
+   }
+
+   return type;
+}
+
+/**
  * Validate the types and qualifiers of an output from one stage against the
  * matching input to another stage.
  */
@@ -309,6 +332,41 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
    }
 }
 
+/**
+ * Demote shader inputs and outputs that are not used in other stages, and
+ * remove them via dead code elimination.
+ */
+void
+remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
+                                        gl_shader *sh,
+                                        enum ir_variable_mode mode)
+{
+   if (is_separate_shader_object)
+      return;
+
+   foreach_in_list(ir_instruction, node, sh->ir) {
+      ir_variable *const var = node->as_variable();
+
+      if ((var == NULL) || (var->data.mode != int(mode)))
+	 continue;
+
+      /* A shader 'in' or 'out' variable is only really an input or output if
+       * its value is used by other shader stages. This will cause the
+       * variable to have a location assigned.
+       */
+      if (var->data.is_unmatched_generic_inout) {
+         assert(var->data.mode != ir_var_temporary);
+	 var->data.mode = ir_var_auto;
+      }
+   }
+
+   /* Eliminate code that is now dead due to unused inputs/outputs being
+    * demoted.
+    */
+   while (do_dead_code(sh->ir, false))
+      ;
+
+}
 
 /**
  * Initialize this object based on a string that was passed to
@@ -767,7 +825,8 @@ public:
                    gl_shader_stage consumer_stage);
    ~varying_matches();
    void record(ir_variable *producer_var, ir_variable *consumer_var);
-   unsigned assign_locations(uint64_t reserved_slots, bool separate_shader);
+   unsigned assign_locations(struct gl_shader_program *prog,
+                             uint64_t reserved_slots, bool separate_shader);
    void store_locations() const;
 
 private:
@@ -946,32 +1005,14 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
    this->matches[this->num_matches].packing_order
       = this->compute_packing_order(var);
    if (this->disable_varying_packing) {
-      const struct glsl_type *type = var->type;
       unsigned slots;
+      gl_shader_stage stage =
+         (producer_var != NULL) ? producer_stage : consumer_stage;
 
-      /* Some shader stages have 2-dimensional varyings. Use the inner type. */
-      if (!var->data.patch &&
-          ((var == producer_var && producer_stage == MESA_SHADER_TESS_CTRL) ||
-           (var == consumer_var && (consumer_stage == MESA_SHADER_TESS_CTRL ||
-                                    consumer_stage == MESA_SHADER_TESS_EVAL ||
-                                    consumer_stage == MESA_SHADER_GEOMETRY)))) {
-         assert(type->is_array());
-         type = type->fields.array;
-      }
+      const glsl_type *type = get_varying_type(var, stage);
 
-      if (type->is_array()) {
-         slots = 1;
-         while (type->is_array()) {
-            slots *= type->length;
-            type = type->fields.array;
-         }
-         slots *= type->matrix_columns;
-      } else {
-         slots = type->matrix_columns;
-      }
-      if (type->without_array()->is_dual_slot_double())
-         slots *= 2;
-      this->matches[this->num_matches].num_components = 4 * slots;
+      slots = type->count_attribute_slots(false);
+      this->matches[this->num_matches].num_components = slots * 4;
    } else {
       this->matches[this->num_matches].num_components
          = var->type->component_slots();
@@ -991,7 +1032,9 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
  * passed to varying_matches::record().
  */
 unsigned
-varying_matches::assign_locations(uint64_t reserved_slots, bool separate_shader)
+varying_matches::assign_locations(struct gl_shader_program *prog,
+                                  uint64_t reserved_slots,
+                                  bool separate_shader)
 {
    /* We disable varying sorting for separate shader programs for the
     * following reasons:
@@ -1028,10 +1071,20 @@ varying_matches::assign_locations(uint64_t reserved_slots, bool separate_shader)
    for (unsigned i = 0; i < this->num_matches; i++) {
       unsigned *location = &generic_location;
 
-      if ((this->matches[i].consumer_var &&
-           this->matches[i].consumer_var->data.patch) ||
-          (this->matches[i].producer_var &&
-           this->matches[i].producer_var->data.patch))
+      const ir_variable *var;
+      const glsl_type *type;
+      bool is_vertex_input = false;
+      if (matches[i].consumer_var) {
+         var = matches[i].consumer_var;
+         type = get_varying_type(var, consumer_stage);
+         if (consumer_stage == MESA_SHADER_VERTEX)
+            is_vertex_input = true;
+      } else {
+         var = matches[i].producer_var;
+         type = get_varying_type(var, producer_stage);
+      }
+
+      if (var->data.patch)
          location = &generic_patch_location;
 
       /* Advance to the next slot if this varying has a different packing
@@ -1043,9 +1096,45 @@ varying_matches::assign_locations(uint64_t reserved_slots, bool separate_shader)
           != this->matches[i].packing_class) {
          *location = ALIGN(*location, 4);
       }
-      while ((*location < MAX_VARYING * 4u) &&
-            (reserved_slots & (1u << *location / 4u))) {
-         *location = ALIGN(*location + 1, 4);
+
+      unsigned num_elements =  type->count_attribute_slots(is_vertex_input);
+      unsigned slot_end = this->disable_varying_packing ? 4 :
+         type->without_array()->vector_elements;
+      slot_end += *location - 1;
+
+      /* FIXME: We could be smarter in the below code and loop back over
+       * trying to fill any locations that we skipped because we couldn't pack
+       * the varying between an explicit location. For now just let the user
+       * hit the linking error if we run out of room and suggest they use
+       * explicit locations.
+       */
+      for (unsigned j = 0; j < num_elements; j++) {
+         while ((slot_end < MAX_VARYING * 4u) &&
+                ((reserved_slots & (UINT64_C(1) << *location / 4u) ||
+                 (reserved_slots & (UINT64_C(1) << slot_end / 4u))))) {
+
+            *location = ALIGN(*location + 1, 4);
+            slot_end = *location;
+
+            /* reset the counter and try again */
+            j = 0;
+         }
+
+         /* Increase the slot to make sure there is enough room for next
+          * array element.
+          */
+         if (this->disable_varying_packing)
+            slot_end += 4;
+         else
+            slot_end += type->without_array()->vector_elements;
+      }
+
+      if (!var->data.patch && *location >= MAX_VARYING * 4u) {
+         linker_error(prog, "insufficient contiguous locations available for "
+                      "%s it is possible an array or struct could not be "
+                      "packed between varyings with explicit locations. Try "
+                      "using an explicit location for arrays and structs.",
+                      var->name);
       }
 
       this->matches[i].generic_location = *location;
@@ -1429,12 +1518,20 @@ reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode)
    foreach_in_list(ir_instruction, node, stage->ir) {
       ir_variable *const var = node->as_variable();
 
-      if (var == NULL || var->data.mode != io_mode || !var->data.explicit_location)
+      if (var == NULL || var->data.mode != io_mode ||
+          !var->data.explicit_location ||
+          var->data.location < VARYING_SLOT_VAR0)
          continue;
 
       var_slot = var->data.location - VARYING_SLOT_VAR0;
-      if (var_slot >= 0 && var_slot < MAX_VARYING)
-         slots |= 1u << var_slot;
+
+      unsigned num_elements = get_varying_type(var, stage->Stage)
+         ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX);
+      for (unsigned i = 0; i < num_elements; i++) {
+         if (var_slot >= 0 && var_slot < MAX_VARYING)
+            slots |= UINT64_C(1) << var_slot;
+         var_slot += 1;
+      }
    }
 
    return slots;
@@ -1620,7 +1717,7 @@ assign_varying_locations(struct gl_context *ctx,
       reserved_varying_slot(producer, ir_var_shader_out) |
       reserved_varying_slot(consumer, ir_var_shader_in);
 
-   const unsigned slots_used = matches.assign_locations(reserved_slots,
+   const unsigned slots_used = matches.assign_locations(prog, reserved_slots,
                                                         prog->SeparateShader);
    matches.store_locations();
 
@@ -1640,17 +1737,6 @@ assign_varying_locations(struct gl_context *ctx,
    hash_table_dtor(consumer_inputs);
    hash_table_dtor(consumer_interface_inputs);
 
-   if (!disable_varying_packing) {
-      if (producer) {
-         lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
-                               0, producer);
-      }
-      if (consumer) {
-         lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
-                               consumer_vertices, consumer);
-      }
-   }
-
    if (consumer && producer) {
       foreach_in_list(ir_instruction, node, consumer->ir) {
          ir_variable *const var = node->as_variable();
@@ -1691,13 +1777,29 @@ assign_varying_locations(struct gl_context *ctx,
 			    var->name,
                             _mesa_shader_stage_to_string(producer->Stage));
             }
-
-            /* An 'in' variable is only really a shader input if its
-             * value is written by the previous stage.
-             */
-            var->data.mode = ir_var_auto;
          }
       }
+
+      /* Now that validation is done its safe to remove unused varyings. As
+       * we have both a producer and consumer its safe to remove unused
+       * varyings even if the program is a SSO because the stages are being
+       * linked together i.e. we have a multi-stage SSO.
+       */
+      remove_unused_shader_inputs_and_outputs(false, producer,
+                                              ir_var_shader_out);
+      remove_unused_shader_inputs_and_outputs(false, consumer,
+                                              ir_var_shader_in);
+   }
+
+   if (!disable_varying_packing) {
+      if (producer) {
+         lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
+                               0, producer);
+      }
+      if (consumer) {
+         lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
+                               consumer_vertices, consumer);
+      }
    }
 
    return true;
diff --git a/src/glsl/link_varyings.h b/src/glsl/link_varyings.h
index 1d12978fa30..b2812614ecc 100644
--- a/src/glsl/link_varyings.h
+++ b/src/glsl/link_varyings.h
@@ -268,6 +268,11 @@ parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
                       const void *mem_ctx, unsigned num_names,
                       char **varying_names, tfeedback_decl *decls);
 
+void
+remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
+                                        gl_shader *sh,
+                                        enum ir_variable_mode mode);
+
 bool
 store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
                      unsigned num_tfeedback_decls,
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index c7e69765335..418bd09e49e 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1014,7 +1014,7 @@ cross_validate_globals(struct gl_shader_program *prog,
             }
 
             if (var->type->contains_atomic() &&
-                var->data.atomic.offset != existing->data.atomic.offset) {
+                var->data.offset != existing->data.offset) {
                linker_error(prog, "offset specifications for %s "
                             "`%s' have differing values\n",
                             mode_string(var), var->name);
@@ -2723,30 +2723,6 @@ match_explicit_outputs_to_inputs(struct gl_shader_program *prog,
 }
 
 /**
- * Demote shader inputs and outputs that are not used in other stages
- */
-void
-demote_shader_inputs_and_outputs(gl_shader *sh, enum ir_variable_mode mode)
-{
-   foreach_in_list(ir_instruction, node, sh->ir) {
-      ir_variable *const var = node->as_variable();
-
-      if ((var == NULL) || (var->data.mode != int(mode)))
-	 continue;
-
-      /* A shader 'in' or 'out' variable is only really an input or output if
-       * its value is used by other shader stages.  This will cause the variable
-       * to have a location assigned.
-       */
-      if (var->data.is_unmatched_generic_inout) {
-         assert(var->data.mode != ir_var_temporary);
-	 var->data.mode = ir_var_auto;
-      }
-   }
-}
-
-
-/**
  * Store the gl_FragDepth layout in the gl_shader_program struct.
  */
 static void
@@ -3433,7 +3409,7 @@ add_interface_variables(struct gl_shader_program *shProg,
 }
 
 static bool
-add_packed_varyings(struct gl_shader_program *shProg, int stage)
+add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type)
 {
    struct gl_shader *sh = shProg->_LinkedShaders[stage];
    GLenum iface;
@@ -3454,10 +3430,13 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage)
          default:
             unreachable("unexpected type");
          }
-         if (!add_program_resource(shProg, iface, var,
-                                   build_stageref(shProg, var->name,
-                                                  var->data.mode)))
-            return false;
+
+         if (type == iface) {
+            if (!add_program_resource(shProg, iface, var,
+                                      build_stageref(shProg, var->name,
+                                                     var->data.mode)))
+               return false;
+         }
       }
    }
    return true;
@@ -3724,9 +3703,9 @@ build_program_resource_list(struct gl_shader_program *shProg)
 
    /* Program interface needs to expose varyings in case of SSO. */
    if (shProg->SeparateShader) {
-      if (!add_packed_varyings(shProg, input_stage))
+      if (!add_packed_varyings(shProg, input_stage, GL_PROGRAM_INPUT))
          return;
-      if (!add_packed_varyings(shProg, output_stage))
+      if (!add_packed_varyings(shProg, output_stage, GL_PROGRAM_OUTPUT))
          return;
    }
 
@@ -3942,8 +3921,10 @@ split_ubos_and_ssbos(void *mem_ctx,
                      unsigned num_blocks,
                      struct gl_uniform_block ***ubos,
                      unsigned *num_ubos,
+                     unsigned **ubo_interface_block_indices,
                      struct gl_uniform_block ***ssbos,
-                     unsigned *num_ssbos)
+                     unsigned *num_ssbos,
+                     unsigned **ssbo_interface_block_indices)
 {
    unsigned num_ubo_blocks = 0;
    unsigned num_ssbo_blocks = 0;
@@ -3961,11 +3942,25 @@ split_ubos_and_ssbos(void *mem_ctx,
    *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks);
    *num_ssbos = 0;
 
+   if (ubo_interface_block_indices)
+      *ubo_interface_block_indices =
+         ralloc_array(mem_ctx, unsigned, num_ubo_blocks);
+
+   if (ssbo_interface_block_indices)
+      *ssbo_interface_block_indices =
+         ralloc_array(mem_ctx, unsigned, num_ssbo_blocks);
+
    for (unsigned i = 0; i < num_blocks; i++) {
       if (blocks[i].IsShaderStorage) {
-         (*ssbos)[(*num_ssbos)++] = &blocks[i];
+         (*ssbos)[*num_ssbos] = &blocks[i];
+         if (ssbo_interface_block_indices)
+            (*ssbo_interface_block_indices)[*num_ssbos] = i;
+         (*num_ssbos)++;
       } else {
-         (*ubos)[(*num_ubos)++] = &blocks[i];
+         (*ubos)[*num_ubos] = &blocks[i];
+         if (ubo_interface_block_indices)
+            (*ubo_interface_block_indices)[*num_ubos] = i;
+         (*num_ubos)++;
       }
    }
 
@@ -4443,14 +4438,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       do_dead_builtin_varyings(ctx, sh, NULL,
                                num_tfeedback_decls, tfeedback_decls);
 
-      if (!prog->SeparateShader) {
-         demote_shader_inputs_and_outputs(sh, ir_var_shader_out);
-         /* Eliminate code that is now dead due to unused outputs being
-          * demoted.
-          */
-         while (do_dead_code(sh->ir, false))
-            ;
-      }
+      remove_unused_shader_inputs_and_outputs(prog->SeparateShader, sh,
+                                              ir_var_shader_out);
    }
    else if (first == MESA_SHADER_FRAGMENT) {
       /* If the program only contains a fragment shader...
@@ -4468,12 +4457,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
                                        NULL /* tfeedback_decls */))
             goto done;
       } else {
-         demote_shader_inputs_and_outputs(sh, ir_var_shader_in);
-         /* Eliminate code that is now dead due to unused inputs being
-          * demoted.
-          */
-         while (do_dead_code(sh->ir, false))
-            ;
+         remove_unused_shader_inputs_and_outputs(false, sh,
+                                                 ir_var_shader_in);
       }
    }
 
@@ -4494,16 +4479,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
                 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
                 tfeedback_decls);
 
-      demote_shader_inputs_and_outputs(sh_i, ir_var_shader_out);
-      demote_shader_inputs_and_outputs(sh_next, ir_var_shader_in);
-
-      /* Eliminate code that is now dead due to unused outputs being demoted.
-       */
-      while (do_dead_code(sh_i->ir, false))
-         ;
-      while (do_dead_code(sh_next->ir, false))
-         ;
-
       /* This must be done after all dead varyings are eliminated. */
       if (!check_against_output_limit(ctx, prog, sh_i))
          goto done;
@@ -4577,8 +4552,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
                               sh->NumBufferInterfaceBlocks,
                               &sh->UniformBlocks,
                               &sh->NumUniformBlocks,
+                              NULL,
                               &sh->ShaderStorageBlocks,
-                              &sh->NumShaderStorageBlocks);
+                              &sh->NumShaderStorageBlocks,
+                              NULL);
       }
    }
 
@@ -4587,8 +4564,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
                         prog->NumBufferInterfaceBlocks,
                         &prog->UniformBlocks,
                         &prog->NumUniformBlocks,
+                        &prog->UboInterfaceBlockIndex,
                         &prog->ShaderStorageBlocks,
-                        &prog->NumShaderStorageBlocks);
+                        &prog->NumShaderStorageBlocks,
+                        &prog->SsboInterfaceBlockIndex);
 
    /* FINISHME: Assign fragment shader output locations. */
 
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 3cc42d229a0..e9035b4e1ba 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -402,7 +402,7 @@ nir_visitor::visit(ir_variable *ir)
    var->data.index = ir->data.index;
    var->data.descriptor_set = 0;
    var->data.binding = ir->data.binding;
-   var->data.atomic.offset = ir->data.atomic.offset;
+   var->data.offset = ir->data.offset;
    var->data.image.read_only = ir->data.image_read_only;
    var->data.image.write_only = ir->data.image_write_only;
    var->data.image.coherent = ir->data.image_coherent;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 61e51da5867..a2c642ec6fc 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -296,9 +296,7 @@ typedef struct {
       /**
        * Location an atomic counter is stored at.
        */
-      struct {
-         unsigned offset;
-      } atomic;
+      unsigned offset;
 
       /**
        * ARB_shader_image_load_store qualifiers.
@@ -1503,8 +1501,8 @@ typedef struct nir_function {
 } nir_function;
 
 typedef struct nir_shader_compiler_options {
-   bool lower_ffma;
    bool lower_fdiv;
+   bool lower_ffma;
    bool lower_flrp;
    bool lower_fpow;
    bool lower_fsat;
@@ -2004,12 +2002,46 @@ nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
 void nir_validate_shader(nir_shader *shader);
 void nir_metadata_set_validation_flag(nir_shader *shader);
 void nir_metadata_check_validation_flag(nir_shader *shader);
+
+#include "util/debug.h"
+static inline bool
+should_clone_nir(void)
+{
+   static int should_clone = -1;
+   if (should_clone < 0)
+      should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
+
+   return should_clone;
+}
 #else
 static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
 static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
+static inline bool should_clone_nir(void) { return false; }
 #endif /* DEBUG */
 
+#define _PASS(nir, do_pass) do {                                     \
+   do_pass                                                           \
+   nir_validate_shader(nir);                                         \
+   if (should_clone_nir()) {                                         \
+      nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
+      ralloc_free(nir);                                              \
+      nir = clone;                                                   \
+   }                                                                 \
+} while (0)
+
+#define NIR_PASS(progress, nir, pass, ...) _PASS(nir,                \
+   nir_metadata_set_validation_flag(nir);                            \
+   if (pass(nir, ##__VA_ARGS__)) {                                   \
+      progress = true;                                               \
+      nir_metadata_check_validation_flag(nir);                       \
+   }                                                                 \
+)
+
+#define NIR_PASS_V(nir, pass, ...) _PASS(nir,                        \
+   pass(nir, ##__VA_ARGS__);                                         \
+)
+
 void nir_calc_dominance_impl(nir_function_impl *impl);
 void nir_calc_dominance(nir_shader *shader);
 
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
index 259c154149b..1aa78e18a85 100644
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -74,7 +74,7 @@ lower_instr(nir_intrinsic_instr *instr,
       state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
 
    nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
-   offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
+   offset_const->value.u[0] = instr->variables[0]->var->data.offset;
 
    nir_instr_insert_before(&instr->instr, &offset_const->instr);
 
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
index f84a02410a8..0ca6a289396 100644
--- a/src/glsl/nir/nir_lower_clip.c
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -72,6 +72,7 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
    store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
    store->num_components = 4;
    store->const_index[0] = out->data.driver_location;
+   store->const_index[1] = 0xf;   /* wrmask */
    store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
    store->src[0].is_ssa = true;
    store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 4bc6d16cbad..d31507fe531 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -361,12 +361,12 @@ binop("udiv", tuint, "", "src0 / src1")
 # returns a boolean representing the carry resulting from the addition of
 # the two unsigned arguments.
 
-binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0")
+binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0")
 
 # returns a boolean representing the borrow resulting from the subtraction
 # of the two unsigned arguments.
 
-binop_convert("usub_borrow", tbool, tuint, "", "src0 < src1")
+binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1")
 
 binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
 binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index f4a863239b6..db18fc9619b 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -184,6 +184,7 @@ optimizations = [
    (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
    # Division and reciprocal
    (('fdiv', 1.0, a), ('frcp', a)),
+   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
    (('frcp', ('frcp', a)), a),
    (('frcp', ('fsqrt', a)), ('frsq', a)),
    (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
@@ -226,8 +227,8 @@ optimizations = [
    # Misc. lowering
    (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
    (('bitfield_insert', a, b, c, d), ('bfi', ('bfm', d, c), b, a), 'options->lower_bitfield_insert'),
-   (('uadd_carry', a, b), ('ult', ('iadd', a, b), a), 'options->lower_uadd_carry'),
-   (('usub_borrow', a, b), ('ult', a, b), 'options->lower_usub_borrow'),
+   (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
+   (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'),
 ]
 
 # Add optimizations to handle the case where the result of a ternary is
diff --git a/src/glsl/nir/shader_enums.c b/src/glsl/nir/shader_enums.c
index f1dbe0158ab..1410a504484 100644
--- a/src/glsl/nir/shader_enums.c
+++ b/src/glsl/nir/shader_enums.c
@@ -47,6 +47,42 @@ const char * gl_shader_stage_name(gl_shader_stage stage)
    return NAME(stage);
 }
 
+/**
+ * Translate a gl_shader_stage to a short shader stage name for debug
+ * printouts and error messages.
+ */
+const char * _mesa_shader_stage_to_string(unsigned stage)
+{
+   switch (stage) {
+   case MESA_SHADER_VERTEX:   return "vertex";
+   case MESA_SHADER_FRAGMENT: return "fragment";
+   case MESA_SHADER_GEOMETRY: return "geometry";
+   case MESA_SHADER_COMPUTE:  return "compute";
+   case MESA_SHADER_TESS_CTRL: return "tess ctrl";
+   case MESA_SHADER_TESS_EVAL: return "tess eval";
+   }
+
+   unreachable("Unknown shader stage.");
+}
+
+/**
+ * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
+ * for debug printouts and error messages.
+ */
+const char * _mesa_shader_stage_to_abbrev(unsigned stage)
+{
+   switch (stage) {
+   case MESA_SHADER_VERTEX:   return "VS";
+   case MESA_SHADER_FRAGMENT: return "FS";
+   case MESA_SHADER_GEOMETRY: return "GS";
+   case MESA_SHADER_COMPUTE:  return "CS";
+   case MESA_SHADER_TESS_CTRL: return "TCS";
+   case MESA_SHADER_TESS_EVAL: return "TES";
+   }
+
+   unreachable("Unknown shader stage.");
+}
+
 const char * gl_vert_attrib_name(gl_vert_attrib attrib)
 {
    static const char *names[] = {
diff --git a/src/glsl/nir/shader_enums.h b/src/glsl/nir/shader_enums.h
index dfab0bb9952..bc6ea3844b6 100644
--- a/src/glsl/nir/shader_enums.h
+++ b/src/glsl/nir/shader_enums.h
@@ -26,6 +26,10 @@
 #ifndef SHADER_ENUMS_H
 #define SHADER_ENUMS_H
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * Shader stages. Note that these will become 5 with tessellation.
  *
@@ -45,6 +49,18 @@ typedef enum
 
 const char * gl_shader_stage_name(gl_shader_stage stage);
 
+/**
+ * Translate a gl_shader_stage to a short shader stage name for debug
+ * printouts and error messages.
+ */
+const char * _mesa_shader_stage_to_string(unsigned stage);
+
+/**
+ * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
+ * for debug printouts and error messages.
+ */
+const char * _mesa_shader_stage_to_abbrev(unsigned stage);
+
 #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
 
 
@@ -412,7 +428,8 @@ typedef enum
     * \name Fragment shader system values
     */
    /*@{*/
-   SYSTEM_VALUE_FRONT_FACE,     /**< (not done yet) */
+   SYSTEM_VALUE_FRAG_COORD,
+   SYSTEM_VALUE_FRONT_FACE,
    SYSTEM_VALUE_SAMPLE_ID,
    SYSTEM_VALUE_SAMPLE_POS,
    SYSTEM_VALUE_SAMPLE_MASK_IN,
@@ -520,4 +537,8 @@ enum gl_frag_depth_layout
    FRAG_DEPTH_LAYOUT_UNCHANGED
 };
 
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
 #endif /* SHADER_ENUMS_H */
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index 5ce804eed20..0f7a16a5e6f 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -130,6 +130,11 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
       shProg->InterfaceBlockStageIndex[i] = NULL;
    }
 
+   ralloc_free(shProg->UboInterfaceBlockIndex);
+   shProg->UboInterfaceBlockIndex = NULL;
+   ralloc_free(shProg->SsboInterfaceBlockIndex);
+   shProg->SsboInterfaceBlockIndex = NULL;
+
    ralloc_free(shProg->AtomicBuffers);
    shProg->AtomicBuffers = NULL;
    shProg->NumAtomicBuffers = 0;