5 files changed, 579 insertions, 0 deletions
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 7154fd1bc7d..5436584abdd 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -273,6 +273,7 @@ NIR_FILES = \
 	nir/nir_move_load_const.c \
 	nir/nir_move_vec_src_uses_to_dest.c \
 	nir/nir_normalize_cubemap_coords.c \
+	nir/nir_opt_combine_stores.c \
 	nir/nir_opt_conditional_discard.c \
 	nir/nir_opt_constant_folding.c \
 	nir/nir_opt_copy_prop_vars.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 514f5ceaa56..b46bf8f2ebf 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -156,6 +156,7 @@ files_libnir = files(
   'nir_move_load_const.c',
   'nir_move_vec_src_uses_to_dest.c',
   'nir_normalize_cubemap_coords.c',
+  'nir_opt_combine_stores.c',
   'nir_opt_conditional_discard.c',
   'nir_opt_constant_folding.c',
   'nir_opt_copy_prop_vars.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 22831cf3cc1..98adf1b5331 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3329,6 +3329,8 @@ bool nir_opt_algebraic_before_ffma(nir_shader *shader);
 bool nir_opt_algebraic_late(nir_shader *shader);
 bool nir_opt_constant_folding(nir_shader *shader);
 
+bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
+
 bool nir_opt_global_to_local(nir_shader *shader);
 
 bool nir_copy_prop(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_combine_stores.c b/src/compiler/nir/nir_opt_combine_stores.c
new file mode 100644
index 00000000000..48b9cfa1501
--- /dev/null
+++ b/src/compiler/nir/nir_opt_combine_stores.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+
+#include "util/bitscan.h"
+#include "util/list.h"
+#include "util/u_math.h"
+
+/* Combine stores of vectors to the same deref into a single store.
+ *
+ * This per-block pass keeps track of stores of vectors to the same
+ * destination and combines them into the last store of the sequence.  Dead
+ * stores (or parts of the store) found during the process are removed.
+ *
+ * A pending combination becomes an actual combination in various situations:
+ * at the end of the block, when another instruction uses the memory or due to
+ * barriers.
+ *
+ * Besides vectors, the pass also look at array derefs of vectors.  For direct
+ * array derefs, it works like a write mask access to the given component.
+ * For indirect access there's no way to know before hand what component it
+ * will overlap with, so the combination is finished -- the indirect remains
+ * unmodified.
+ */
+
+/* Keep track of a group of stores that can be combined.  All stores share the
+ * same destination.
+ */
+struct combined_store {
+   struct list_head link;
+
+   nir_component_mask_t write_mask;
+   nir_deref_instr *dst;
+
+   /* Latest store added.  It is reused when combining. */
+   nir_intrinsic_instr *latest;
+
+   /* Original store for each component.  The number of times a store appear
+    * in this array is kept in the store's pass_flags.
+    */
+   nir_intrinsic_instr *stores[NIR_MAX_VEC_COMPONENTS];
+};
+
+struct combine_stores_state {
+   nir_variable_mode modes;
+
+   /* Pending store combinations. */
+   struct list_head pending;
+
+   /* Per function impl state. */
+   nir_builder b;
+   bool progress;
+
+
+   /* Allocator and freelist to reuse structs between functions. */
+   void *lin_ctx;
+   struct list_head freelist;
+};
+
+static struct combined_store *
+alloc_combined_store(struct combine_stores_state *state)
+{
+   struct combined_store *result;
+   if (list_empty(&state->freelist)) {
+      result = linear_zalloc_child(state->lin_ctx, sizeof(*result));
+   } else {
+      result = list_first_entry(&state->freelist,
+                                struct combined_store,
+                                link);
+      list_del(&result->link);
+      memset(result, 0, sizeof(*result));
+   }
+   return result;
+}
+
+static void
+free_combined_store(struct combine_stores_state *state,
+                    struct combined_store *combo)
+{
+   list_del(&combo->link);
+   combo->write_mask = 0;
+   list_add(&combo->link, &state->freelist);
+}
+
+static void
+combine_stores(struct combine_stores_state *state,
+                   struct combined_store *combo)
+{
+   assert(combo->latest);
+   assert(combo->latest->intrinsic == nir_intrinsic_store_deref);
+
+   /* If the combined writemask is the same as the latest store, we know there
+    * is only one store in the combination, so nothing to combine.
+    */
+   if ((combo->write_mask & nir_intrinsic_write_mask(combo->latest)) ==
+       combo->write_mask)
+      return;
+
+   state->b.cursor = nir_before_instr(&combo->latest->instr);
+
+   /* Build a new vec, to be used as source for the combined store.  As it
+    * gets build, remove previous stores that are not needed anymore.
+    */
+   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = {0};
+   unsigned num_components = glsl_get_vector_elements(combo->dst->type);
+   unsigned bit_size = combo->latest->src[1].ssa->bit_size;
+   for (unsigned i = 0; i < num_components; i++) {
+      nir_intrinsic_instr *store = combo->stores[i];
+      if (combo->write_mask & (1 << i)) {
+         assert(store);
+         assert(store->src[1].is_ssa);
+
+         /* If store->num_components == 1 then we are in the deref-of-vec case
+          * and store->src[1] is a scalar.  Otherwise, we're a regular vector
+          * load and we have to pick off a component.
+          */
+         comps[i] = store->num_components == 1 ?
+            store->src[1].ssa :
+            nir_channel(&state->b, store->src[1].ssa, i);
+
+         assert(store->instr.pass_flags > 0);
+         if (--store->instr.pass_flags == 0 && store != combo->latest)
+            nir_instr_remove(&store->instr);
+      } else {
+         comps[i] = nir_ssa_undef(&state->b, 1, bit_size);
+      }
+   }
+   assert(combo->latest->instr.pass_flags == 0);
+   nir_ssa_def *vec = nir_vec(&state->b, comps, num_components);
+
+   /* Fix the latest store with the combined information. */
+   nir_intrinsic_instr *store = combo->latest;
+
+   /* In this case, our store is as an array deref of a vector so we need to
+    * rewrite it to use a deref to the whole vector.
+    */
+   if (store->num_components == 1) {
+      store->num_components = num_components;
+      nir_instr_rewrite_src(&store->instr, &store->src[0],
+                            nir_src_for_ssa(&combo->dst->dest.ssa));
+   }
+
+   assert(store->num_components == num_components);
+   nir_intrinsic_set_write_mask(store, combo->write_mask);
+   nir_instr_rewrite_src(&store->instr, &store->src[1],
+                         nir_src_for_ssa(vec));
+   state->progress = true;
+}
+
+static void
+combine_stores_with_deref(struct combine_stores_state *state,
+                              nir_deref_instr *deref)
+{
+   if ((state->modes & deref->mode) == 0)
+      return;
+
+   list_for_each_entry_safe(struct combined_store, combo, &state->pending, link) {
+      if (nir_compare_derefs(combo->dst, deref) & nir_derefs_may_alias_bit) {
+         combine_stores(state, combo);
+         free_combined_store(state, combo);
+      }
+   }
+}
+
+static void
+combine_stores_with_modes(struct combine_stores_state *state,
+                              nir_variable_mode modes)
+{
+   if ((state->modes & modes) == 0)
+      return;
+
+   list_for_each_entry_safe(struct combined_store, combo, &state->pending, link) {
+      if (combo->dst->mode & modes) {
+         combine_stores(state, combo);
+         free_combined_store(state, combo);
+      }
+   }
+}
+
+static struct combined_store *
+find_matching_combined_store(struct combine_stores_state *state,
+                             nir_deref_instr *deref)
+{
+   list_for_each_entry(struct combined_store, combo, &state->pending, link) {
+      if (nir_compare_derefs(combo->dst, deref) & nir_derefs_equal_bit)
+         return combo;
+   }
+   return NULL;
+}
+
+static void
+update_combined_store(struct combine_stores_state *state,
+                      nir_intrinsic_instr *intrin)
+{
+   nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+   if ((dst->mode & state->modes) == 0)
+      return;
+
+   unsigned vec_mask;
+   nir_deref_instr *vec_dst;
+
+   if (glsl_type_is_vector(dst->type)) {
+      vec_mask = nir_intrinsic_write_mask(intrin);
+      vec_dst = dst;
+   } else {
+      /* Besides vectors, only direct array derefs of vectors are handled. */
+      if (dst->deref_type != nir_deref_type_array ||
+          !nir_src_is_const(dst->arr.index) ||
+          !glsl_type_is_vector(nir_deref_instr_parent(dst)->type)) {
+         combine_stores_with_deref(state, dst);
+         return;
+      }
+
+      uint64_t index = nir_src_as_uint(dst->arr.index);
+      vec_dst = nir_deref_instr_parent(dst);
+
+      if (index >= glsl_get_vector_elements(vec_dst->type)) {
+         /* Storing to an invalid index is a no-op. */
+         nir_instr_remove(&intrin->instr);
+         state->progress = true;
+         return;
+      }
+
+      vec_mask = 1 << index;
+   }
+
+   struct combined_store *combo = find_matching_combined_store(state, vec_dst);
+   if (!combo) {
+      combo = alloc_combined_store(state);
+      combo->dst = vec_dst;
+      list_add(&combo->link, &state->pending);
+   }
+
+   /* Use pass_flags to reference count the store based on how many
+    * components are still used by the combination.
+    */
+   intrin->instr.pass_flags = util_bitcount(vec_mask);
+   combo->latest = intrin;
+
+   /* Update the combined_store, clearing up older overlapping references. */
+   combo->write_mask |= vec_mask;
+   while (vec_mask) {
+      unsigned i = u_bit_scan(&vec_mask);
+      nir_intrinsic_instr *prev_store = combo->stores[i];
+
+      if (prev_store) {
+         if (--prev_store->instr.pass_flags == 0) {
+            nir_instr_remove(&prev_store->instr);
+         } else {
+            assert(glsl_type_is_vector(
+                      nir_src_as_deref(prev_store->src[0])->type));
+            nir_component_mask_t prev_mask = nir_intrinsic_write_mask(prev_store);
+            nir_intrinsic_set_write_mask(prev_store, prev_mask & ~(1 << i));
+         }
+         state->progress = true;
+      }
+      combo->stores[i] = combo->latest;
+   }
+}
+
+static void
+combine_stores_block(struct combine_stores_state *state, nir_block *block)
+{
+   nir_foreach_instr_safe(instr, block) {
+      if (instr->type == nir_instr_type_call) {
+         combine_stores_with_modes(state, nir_var_shader_out |
+                                              nir_var_shader_temp |
+                                              nir_var_function_temp |
+                                              nir_var_mem_ssbo |
+                                              nir_var_mem_shared);
+         continue;
+      }
+
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_store_deref:
+         update_combined_store(state, intrin);
+         break;
+
+      case nir_intrinsic_barrier:
+      case nir_intrinsic_group_memory_barrier:
+      case nir_intrinsic_memory_barrier:
+      case nir_intrinsic_memory_barrier_atomic_counter:
+      case nir_intrinsic_memory_barrier_buffer:
+      case nir_intrinsic_memory_barrier_image:
+      case nir_intrinsic_memory_barrier_shared:
+         /* TODO: Be more granular depending on the barrier. */
+         combine_stores_with_modes(state, nir_var_shader_out |
+                                              nir_var_mem_ssbo |
+                                              nir_var_mem_shared);
+         break;
+
+      case nir_intrinsic_emit_vertex:
+      case nir_intrinsic_emit_vertex_with_counter:
+         combine_stores_with_modes(state, nir_var_shader_out);
+         break;
+
+      case nir_intrinsic_load_deref: {
+         nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
+         combine_stores_with_deref(state, src);
+         break;
+      }
+
+      case nir_intrinsic_copy_deref: {
+         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+         nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
+         combine_stores_with_deref(state, dst);
+         combine_stores_with_deref(state, src);
+         break;
+      }
+
+      case nir_intrinsic_deref_atomic_add:
+      case nir_intrinsic_deref_atomic_imin:
+      case nir_intrinsic_deref_atomic_umin:
+      case nir_intrinsic_deref_atomic_imax:
+      case nir_intrinsic_deref_atomic_umax:
+      case nir_intrinsic_deref_atomic_and:
+      case nir_intrinsic_deref_atomic_or:
+      case nir_intrinsic_deref_atomic_xor:
+      case nir_intrinsic_deref_atomic_exchange:
+      case nir_intrinsic_deref_atomic_comp_swap: {
+         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+         combine_stores_with_deref(state, dst);
+         break;
+      }
+
+      default:
+         break;
+      }
+   }
+
+   /* At the end of the block, try all the remaining combinations. */
+   combine_stores_with_modes(state, state->modes);
+}
+
+static bool
+combine_stores_impl(struct combine_stores_state *state, nir_function_impl *impl)
+{
+   state->progress = false;
+   nir_builder_init(&state->b, impl);
+
+   nir_foreach_block(block, impl)
+      combine_stores_block(state, block);
+
+   if (state->progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return state->progress;
+}
+
+bool
+nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes)
+{
+   void *mem_ctx = ralloc_context(NULL);
+   struct combine_stores_state state = {
+      .modes   = modes,
+      .lin_ctx = linear_zalloc_parent(mem_ctx, 0),
+   };
+
+   list_inithead(&state.pending);
+   list_inithead(&state.freelist);
+
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+      progress |= combine_stores_impl(&state, function->impl);
+   }
+
+   ralloc_free(mem_ctx);
+   return progress;
+}
diff --git a/src/compiler/nir/tests/vars_tests.cpp b/src/compiler/nir/tests/vars_tests.cpp
index f72b6b99f62..9e0b5d2c0df 100644
--- a/src/compiler/nir/tests/vars_tests.cpp
+++ b/src/compiler/nir/tests/vars_tests.cpp
@@ -49,6 +49,10 @@ protected:
       return create_var(mode, glsl_vector_type(GLSL_TYPE_INT, 2), name);
    }
 
+   nir_variable *create_ivec4(nir_variable_mode mode, const char *name) {
+      return create_var(mode, glsl_vector_type(GLSL_TYPE_INT, 4), name);
+   }
+
    nir_variable **create_many_int(nir_variable_mode mode, const char *prefix, unsigned count) {
       nir_variable **result = (nir_variable **)linear_alloc_child(lin_ctx, sizeof(nir_variable *) * count);
       for (unsigned i = 0; i < count; i++)
@@ -63,6 +67,13 @@ protected:
       return result;
    }
 
+   nir_variable **create_many_ivec4(nir_variable_mode mode, const char *prefix, unsigned count) {
+      nir_variable **result = (nir_variable **)linear_alloc_child(lin_ctx, sizeof(nir_variable *) * count);
+      for (unsigned i = 0; i < count; i++)
+         result[i] = create_ivec4(mode, linear_asprintf(lin_ctx, "%s%u", prefix, i));
+      return result;
+   }
+
    unsigned count_intrinsics(nir_intrinsic_op intrinsic);
 
    nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
@@ -132,6 +143,7 @@ nir_vars_test::get_intrinsic(nir_intrinsic_op intrinsic,
 class nir_redundant_load_vars_test : public nir_vars_test {};
 class nir_copy_prop_vars_test : public nir_vars_test {};
 class nir_dead_write_vars_test : public nir_vars_test {};
+class nir_combine_stores_test : public nir_vars_test {};
 
 } // namespace
 
@@ -953,3 +965,165 @@ TEST_F(nir_dead_write_vars_test, DISABLED_unrelated_barrier_in_two_blocks)
    nir_intrinsic_instr *third_store = get_intrinsic(nir_intrinsic_store_deref, 2);
    EXPECT_EQ(nir_intrinsic_get_var(third_store, 0), v[0]);
 }
+
+TEST_F(nir_combine_stores_test, non_overlapping_stores)
+{
+   nir_variable **v = create_many_ivec4(nir_var_mem_ssbo, "v", 4);
+   nir_variable *out = create_ivec4(nir_var_shader_out, "out");
+
+   for (int i = 0; i < 4; i++)
+      nir_store_var(b, out, nir_load_var(b, v[i]), 1 << i);
+
+   nir_validate_shader(b->shader, NULL);
+
+   bool progress = nir_opt_combine_stores(b->shader, nir_var_shader_out);
+   ASSERT_TRUE(progress);
+
+   nir_validate_shader(b->shader, NULL);
+
+   /* Clean up to verify from where the values in combined store are coming. */
+   nir_copy_prop(b->shader);
+   nir_opt_dce(b->shader);
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+   nir_intrinsic_instr *combined = get_intrinsic(nir_intrinsic_store_deref, 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(combined), 0xf);
+   ASSERT_EQ(nir_intrinsic_get_var(combined, 0), out);
+
+   nir_alu_instr *vec = nir_src_as_alu_instr(&combined->src[1]);
+   ASSERT_TRUE(vec);
+   for (int i = 0; i < 4; i++) {
+      nir_intrinsic_instr *load =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[i].src));
+      ASSERT_EQ(load->intrinsic, nir_intrinsic_load_deref);
+      ASSERT_EQ(nir_intrinsic_get_var(load, 0), v[i])
+         << "Source value for component " << i << " of store is wrong";
+      ASSERT_EQ(vec->src[i].swizzle[0], i)
+         << "Source component for component " << i << " of store is wrong";
+   }
+}
+
+TEST_F(nir_combine_stores_test, overlapping_stores)
+{
+   nir_variable **v = create_many_ivec4(nir_var_mem_ssbo, "v", 3);
+   nir_variable *out = create_ivec4(nir_var_shader_out, "out");
+
+   /* Make stores with xy, yz and zw masks. */
+   for (int i = 0; i < 3; i++) {
+      nir_component_mask_t mask = (1 << i) | (1 << (i + 1));
+      nir_store_var(b, out, nir_load_var(b, v[i]), mask);
+   }
+
+   nir_validate_shader(b->shader, NULL);
+
+   bool progress = nir_opt_combine_stores(b->shader, nir_var_shader_out);
+   ASSERT_TRUE(progress);
+
+   nir_validate_shader(b->shader, NULL);
+
+   /* Clean up to verify from where the values in combined store are coming. */
+   nir_copy_prop(b->shader);
+   nir_opt_dce(b->shader);
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+   nir_intrinsic_instr *combined = get_intrinsic(nir_intrinsic_store_deref, 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(combined), 0xf);
+   ASSERT_EQ(nir_intrinsic_get_var(combined, 0), out);
+
+   nir_alu_instr *vec = nir_src_as_alu_instr(&combined->src[1]);
+   ASSERT_TRUE(vec);
+
+   /* Component x comes from v[0]. */
+   nir_intrinsic_instr *load_for_x =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[0].src));
+   ASSERT_EQ(nir_intrinsic_get_var(load_for_x, 0), v[0]);
+   ASSERT_EQ(vec->src[0].swizzle[0], 0);
+
+   /* Component y comes from v[1]. */
+   nir_intrinsic_instr *load_for_y =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[1].src));
+   ASSERT_EQ(nir_intrinsic_get_var(load_for_y, 0), v[1]);
+   ASSERT_EQ(vec->src[1].swizzle[0], 1);
+
+   /* Components z and w come from v[2]. */
+   nir_intrinsic_instr *load_for_z =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[2].src));
+   nir_intrinsic_instr *load_for_w =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[3].src));
+   ASSERT_EQ(load_for_z, load_for_w);
+   ASSERT_EQ(nir_intrinsic_get_var(load_for_z, 0), v[2]);
+   ASSERT_EQ(vec->src[2].swizzle[0], 2);
+   ASSERT_EQ(vec->src[3].swizzle[0], 3);
+}
+
+TEST_F(nir_combine_stores_test, direct_array_derefs)
+{
+   nir_variable **v = create_many_ivec4(nir_var_mem_ssbo, "vec", 2);
+   nir_variable **s = create_many_int(nir_var_mem_ssbo, "scalar", 2);
+   nir_variable *out = create_ivec4(nir_var_mem_ssbo, "out");
+
+   nir_deref_instr *out_deref = nir_build_deref_var(b, out);
+
+   /* Store to vector with mask x. */
+   nir_store_deref(b, out_deref, nir_load_var(b, v[0]),
+                   1 << 0);
+
+   /* Store to vector with mask yz. */
+   nir_store_deref(b, out_deref, nir_load_var(b, v[1]),
+                   (1 << 2) | (1 << 1));
+
+   /* Store to vector[2], overlapping with previous store. */
+   nir_store_deref(b,
+                   nir_build_deref_array_imm(b, out_deref, 2),
+                   nir_load_var(b, s[0]),
+                   1 << 0);
+
+   /* Store to vector[3], no overlap. */
+   nir_store_deref(b,
+                   nir_build_deref_array_imm(b, out_deref, 3),
+                   nir_load_var(b, s[1]),
+                   1 << 0);
+
+   nir_validate_shader(b->shader, NULL);
+
+   bool progress = nir_opt_combine_stores(b->shader, nir_var_mem_ssbo);
+   ASSERT_TRUE(progress);
+
+   nir_validate_shader(b->shader, NULL);
+
+   /* Clean up to verify from where the values in combined store are coming. */
+   nir_copy_prop(b->shader);
+   nir_opt_dce(b->shader);
+
+   ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
+   nir_intrinsic_instr *combined = get_intrinsic(nir_intrinsic_store_deref, 0);
+   ASSERT_EQ(nir_intrinsic_write_mask(combined), 0xf);
+   ASSERT_EQ(nir_intrinsic_get_var(combined, 0), out);
+
+   nir_alu_instr *vec = nir_src_as_alu_instr(&combined->src[1]);
+   ASSERT_TRUE(vec);
+
+   /* Component x comes from v[0]. */
+   nir_intrinsic_instr *load_for_x =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[0].src));
+   ASSERT_EQ(nir_intrinsic_get_var(load_for_x, 0), v[0]);
+   ASSERT_EQ(vec->src[0].swizzle[0], 0);
+
+   /* Component y comes from v[1]. */
+   nir_intrinsic_instr *load_for_y =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[1].src));
+   ASSERT_EQ(nir_intrinsic_get_var(load_for_y, 0), v[1]);
+   ASSERT_EQ(vec->src[1].swizzle[0], 1);
+
+   /* Components z comes from s[0]. */
+   nir_intrinsic_instr *load_for_z =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[2].src));
+   ASSERT_EQ(nir_intrinsic_get_var(load_for_z, 0), s[0]);
+   ASSERT_EQ(vec->src[2].swizzle[0], 0);
+
+   /* Component w comes from s[1]. */
+   nir_intrinsic_instr *load_for_w =
+         nir_instr_as_intrinsic(nir_src_instr(&vec->src[3].src));
+   ASSERT_EQ(nir_intrinsic_get_var(load_for_w, 0), s[1]);
+   ASSERT_EQ(vec->src[3].swizzle[0], 0);
+}