summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCaio Marcelo de Oliveira Filho <[email protected]>2019-01-14 15:28:33 -0800
committerCaio Marcelo de Oliveira Filho <[email protected]>2019-02-28 23:50:05 -0800
commit96c32d77763c4b561f751ca360e6539a3c5e7f4d (patch)
tree6cedc24cdc0739c9b6fbcc7317520e6f53b0e9c6
parent33dafdc0248cce18ef923313656466dc15ff4c73 (diff)
nir/copy_prop_vars: handle load/store of vector elements
When direct array deref is used on a vector type (for loads and stores), copy_prop_vars is now smart to propagate values it knows about. Given a 'vec4 v', storing to v[3] will update the copy entry for v and it is equivalent to a write to v.w. Loading from v[1] will try first to see if there's a known value for v.y -- and drop the load in that case. The copy entries still always refer to the entire vectors, so the operations happen on the parent deref (the 'vector') and the values are fixed accordingly. It might be the case now that certain entries have not only different SSA defs in each element but also those come from different components than they are set to, because stores to individual elements always come from a SSA definition with a single component. Tests related to these cases are now enabled. v2: Instead of asserting on invalid indices, "load" an undef and remove the store. (Jason) v3: Merge code path for the cases of is_array_deref_of_vector into the regular code path. Add a base_index parameter to value_set_from_value. (code changes by Jason) v4: Removed the get_entry_for_deref helper, now being used only once. Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r--src/compiler/nir/nir_opt_copy_prop_vars.c140
-rw-r--r--src/compiler/nir/tests/vars_tests.cpp8
2 files changed, 114 insertions, 34 deletions
diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c
index e59cd18b98d..b95a455a109 100644
--- a/src/compiler/nir/nir_opt_copy_prop_vars.c
+++ b/src/compiler/nir/nir_opt_copy_prop_vars.c
@@ -288,6 +288,15 @@ copy_entry_remove(struct util_dynarray *copies,
*entry = util_dynarray_pop(copies, struct copy_entry);
}
+static bool
+is_array_deref_of_vector(nir_deref_instr *deref)
+{
+ if (deref->deref_type != nir_deref_type_array)
+ return false;
+ nir_deref_instr *parent = nir_deref_instr_parent(deref);
+ return glsl_type_is_vector(parent->type);
+}
+
static struct copy_entry *
lookup_entry_for_deref(struct util_dynarray *copies,
nir_deref_instr *deref,
@@ -386,8 +395,11 @@ apply_barrier_for_modes(struct util_dynarray *copies,
static void
value_set_from_value(struct value *value, const struct value *from,
- unsigned write_mask)
+ unsigned base_index, unsigned write_mask)
{
+ /* We can't have non-zero indexes with non-trivial write masks */
+ assert(base_index == 0 || write_mask == 1);
+
if (from->is_ssa) {
/* Clear value if it was being used as non-SSA. */
if (!value->is_ssa)
@@ -396,8 +408,8 @@ value_set_from_value(struct value *value, const struct value *from,
/* Only overwrite the written components */
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
if (write_mask & (1 << i)) {
- value->ssa.def[i] = from->ssa.def[i];
- value->ssa.component[i] = from->ssa.component[i];
+ value->ssa.def[base_index + i] = from->ssa.def[i];
+ value->ssa.component[base_index + i] = from->ssa.component[i];
}
}
} else {
@@ -407,6 +419,42 @@ value_set_from_value(struct value *value, const struct value *from,
}
}
+/* Try to load a single element of a vector from the copy_entry. If the data
+ * isn't available, just let the original intrinsic do the work.
+ */
+static bool
+load_element_from_ssa_entry_value(struct copy_prop_var_state *state,
+ struct copy_entry *entry,
+ nir_builder *b, nir_intrinsic_instr *intrin,
+ struct value *value, unsigned index)
+{
+ const struct glsl_type *type = entry->dst->type;
+ unsigned num_components = glsl_get_vector_elements(type);
+ assert(index < num_components);
+
+ /* We don't have the element available, so let the instruction do the work. */
+ if (!entry->src.ssa.def[index])
+ return false;
+
+ b->cursor = nir_instr_remove(&intrin->instr);
+ intrin->instr.block = NULL;
+
+ assert(entry->src.ssa.component[index] <
+ entry->src.ssa.def[index]->num_components);
+ nir_ssa_def *def = nir_channel(b, entry->src.ssa.def[index],
+ entry->src.ssa.component[index]);
+
+ *value = (struct value) {
+ .is_ssa = true,
+ .ssa = {
+ .def = { def },
+ .component = { 0 },
+ },
+ };
+
+ return true;
+}
+
/* Do a "load" from an SSA-based entry return it in "value" as a value with a
* single SSA def. Because an entry could reference multiple different SSA
* defs, a vecN operation may be inserted to combine them into a single SSA
@@ -419,8 +467,16 @@ static bool
load_from_ssa_entry_value(struct copy_prop_var_state *state,
struct copy_entry *entry,
nir_builder *b, nir_intrinsic_instr *intrin,
- struct value *value)
+ nir_deref_instr *src, struct value *value)
{
+ if (is_array_deref_of_vector(src)) {
+ if (!nir_src_is_const(src->arr.index))
+ return false;
+
+ return load_element_from_ssa_entry_value(state, entry, b, intrin, value,
+ nir_src_as_uint(src->arr.index));
+ }
+
*value = entry->src;
assert(value->is_ssa);
@@ -611,7 +667,7 @@ try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
return false;
if (entry->src.is_ssa) {
- return load_from_ssa_entry_value(state, entry, b, intrin, value);
+ return load_from_ssa_entry_value(state, entry, b, intrin, src, value);
} else {
return load_from_deref_entry_value(state, entry, b, intrin, src, value);
}
@@ -639,15 +695,6 @@ invalidate_copies_for_cf_node(struct copy_prop_var_state *state,
}
}
-static bool
-is_array_deref_of_vector(nir_deref_instr *deref)
-{
- if (deref->deref_type != nir_deref_type_array)
- return false;
- nir_deref_instr *parent = nir_deref_instr_parent(deref);
- return glsl_type_is_vector(parent->type);
-}
-
static void
print_value(struct value *value, unsigned num_components)
{
@@ -756,9 +803,25 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
+ int vec_index = 0;
+ nir_deref_instr *vec_src = src;
if (is_array_deref_of_vector(src)) {
- /* Not handled yet. This load won't invalidate existing copies. */
- break;
+ vec_src = nir_deref_instr_parent(src);
+ unsigned vec_comps = glsl_get_vector_elements(vec_src->type);
+
+ /* Indirects are not handled yet. */
+ if (!nir_src_is_const(src->arr.index))
+ break;
+
+ vec_index = nir_src_as_uint(src->arr.index);
+
+ /* Loading from an invalid index yields an undef */
+ if (vec_index >= vec_comps) {
+ b->cursor = nir_instr_remove(instr);
+ nir_ssa_def *u = nir_ssa_undef(b, 1, intrin->dest.ssa.bit_size);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(u));
+ break;
+ }
}
struct copy_entry *src_entry =
@@ -768,7 +831,8 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
if (value.is_ssa) {
/* lookup_load has already ensured that we get a single SSA
* value that has all of the channels. We just have to do the
- * rewrite operation.
+ * rewrite operation. Note for array derefs of vectors, the
+ * channel 0 is used.
*/
if (intrin->instr.block) {
/* The lookup left our instruction in-place. This means it
@@ -804,14 +868,14 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
* contains what we're looking for.
*/
struct copy_entry *entry =
- lookup_entry_for_deref(copies, src, nir_derefs_equal_bit);
+ lookup_entry_for_deref(copies, vec_src, nir_derefs_equal_bit);
if (!entry)
- entry = copy_entry_create(copies, src);
+ entry = copy_entry_create(copies, vec_src);
/* Update the entry with the value of the load. This way
* we can potentially remove subsequent loads.
*/
- value_set_from_value(&entry->src, &value,
+ value_set_from_value(&entry->src, &value, vec_index,
(1 << intrin->num_components) - 1);
break;
}
@@ -820,6 +884,29 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
if (debug) dump_instr(instr);
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+ assert(glsl_type_is_vector_or_scalar(dst->type));
+
+ int vec_index = 0;
+ nir_deref_instr *vec_dst = dst;
+ if (is_array_deref_of_vector(dst)) {
+ vec_dst = nir_deref_instr_parent(dst);
+ unsigned vec_comps = glsl_get_vector_elements(vec_dst->type);
+
+ /* Indirects are not handled yet. Kill everything */
+ if (!nir_src_is_const(dst->arr.index)) {
+ kill_aliases(copies, vec_dst, (1 << vec_comps) - 1);
+ break;
+ }
+
+ vec_index = nir_src_as_uint(dst->arr.index);
+
+ /* Storing to an invalid index is a no-op. */
+ if (vec_index >= vec_comps) {
+ nir_instr_remove(instr);
+ break;
+ }
+ }
+
struct copy_entry *entry =
lookup_entry_for_deref(copies, dst, nir_derefs_equal_bit);
if (entry && value_equals_store_src(&entry->src, intrin)) {
@@ -827,21 +914,14 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
* store is redundant so remove it.
*/
nir_instr_remove(instr);
- } else if (is_array_deref_of_vector(dst)) {
- /* Not handled yet. Writing into an element of 'dst' invalidates
- * any related entries in copies.
- */
- nir_deref_instr *vector = nir_deref_instr_parent(dst);
- unsigned vector_components = glsl_get_vector_elements(vector->type);
- kill_aliases(copies, vector, (1 << vector_components) - 1);
} else {
struct value value = {0};
value_set_ssa_components(&value, intrin->src[1].ssa,
intrin->num_components);
unsigned wrmask = nir_intrinsic_write_mask(intrin);
struct copy_entry *entry =
- get_entry_and_kill_aliases(copies, dst, wrmask);
- value_set_from_value(&entry->src, &value, wrmask);
+ get_entry_and_kill_aliases(copies, vec_dst, wrmask);
+ value_set_from_value(&entry->src, &value, vec_index, wrmask);
}
break;
@@ -903,7 +983,7 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
struct copy_entry *dst_entry =
get_entry_and_kill_aliases(copies, dst, full_mask);
- value_set_from_value(&dst_entry->src, &value, full_mask);
+ value_set_from_value(&dst_entry->src, &value, 0, full_mask);
break;
}
diff --git a/src/compiler/nir/tests/vars_tests.cpp b/src/compiler/nir/tests/vars_tests.cpp
index e86b06dc4a9..03177a465d2 100644
--- a/src/compiler/nir/tests/vars_tests.cpp
+++ b/src/compiler/nir/tests/vars_tests.cpp
@@ -461,7 +461,7 @@ TEST_F(nir_copy_prop_vars_test, simple_store_load_in_two_blocks)
}
}
-TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_reuses_previous_load)
+TEST_F(nir_copy_prop_vars_test, load_direct_array_deref_on_vector_reuses_previous_load)
{
nir_variable *in0 = create_ivec2(nir_var_mem_ssbo, "in0");
nir_variable *in1 = create_ivec2(nir_var_mem_ssbo, "in1");
@@ -497,7 +497,7 @@ TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_reuse
ASSERT_TRUE(nir_src_as_alu_instr(&store->src[1]));
}
-TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_reuses_previous_copy)
+TEST_F(nir_copy_prop_vars_test, load_direct_array_deref_on_vector_reuses_previous_copy)
{
nir_variable *in0 = create_ivec2(nir_var_mem_ssbo, "in0");
nir_variable *vec = create_ivec2(nir_var_mem_ssbo, "vec");
@@ -521,7 +521,7 @@ TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_reuse
ASSERT_EQ(nir_intrinsic_get_var(load, 0), in0);
}
-TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_gets_reused)
+TEST_F(nir_copy_prop_vars_test, load_direct_array_deref_on_vector_gets_reused)
{
nir_variable *in0 = create_ivec2(nir_var_mem_ssbo, "in0");
nir_variable *vec = create_ivec2(nir_var_mem_ssbo, "vec");
@@ -555,7 +555,7 @@ TEST_F(nir_copy_prop_vars_test, DISABLED_load_direct_array_deref_on_vector_gets_
ASSERT_TRUE(nir_src_as_alu_instr(&store->src[1]));
}
-TEST_F(nir_copy_prop_vars_test, DISABLED_store_load_direct_array_deref_on_vector)
+TEST_F(nir_copy_prop_vars_test, store_load_direct_array_deref_on_vector)
{
nir_variable *vec = create_ivec2(nir_var_mem_ssbo, "vec");
nir_variable *out0 = create_int(nir_var_mem_ssbo, "out0");