summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2015-11-17 00:26:37 -0800
committerKenneth Graunke <[email protected]>2015-12-22 15:57:59 -0800
commit7d539080c1a491aff9fb3e90c25df89884477aa8 (patch)
tree066f321f1cef0a6a1578a4490c63ef9e7d5e1259 /src
parent50fc4a925644378c50282004304bc8fd64b95e3c (diff)
nir: Add a writemask to store intrinsics.
Tessellation control shaders need to be careful when writing outputs. Because multiple threads can concurrently write the same output variables, we need to only write the exact components we were told. Traditionally, for sub-vector writes, we've read the whole vector, updated the temporary, and written the whole vector back. This breaks down with concurrent access. This patch prepares the way for a solution by adding a writemask field to store_var intrinsics, as well as the other store intrinsics. It then updates all produces to emit a writemask of "all channels enabled". It updates nir_lower_io to copy the writemask to output store intrinsics. Finally, it updates nir_lower_vars_to_ssa to handle partial writemasks by doing a read-modify-write cycle (which is safe, because local variables are specific to a single thread). This should have no functional change, since no one actually emits partial writemasks yet. v2: Make nir_validate momentarily assert that writemasks cover the complete value - we shouldn't have partial writemasks yet (requested by Jason Ekstrand). v3: Fix accidental SSBO change that arose from merge conflicts. v4: Don't try to handle writemasks in ir3_compiler_nir - my code for indirects was likely wrong, and TTN doesn't generate partial writemasks today anyway. Change them to asserts as requested by Rob Clark. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]> [v3]
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c8
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp2
-rw-r--r--src/glsl/nir/nir_builder.h4
-rw-r--r--src/glsl/nir/nir_intrinsics.h10
-rw-r--r--src/glsl/nir/nir_lower_gs_intrinsics.c3
-rw-r--r--src/glsl/nir/nir_lower_io.c3
-rw-r--r--src/glsl/nir/nir_lower_locals_to_regs.c2
-rw-r--r--src/glsl/nir/nir_lower_var_copies.c1
-rw-r--r--src/glsl/nir/nir_lower_vars_to_ssa.c46
-rw-r--r--src/glsl/nir/nir_validate.c2
-rw-r--r--src/mesa/program/prog_to_nir.c2
12 files changed, 65 insertions, 19 deletions
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 80821572f56..56476d4f54e 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1903,6 +1903,7 @@ ttn_emit_instruction(struct ttn_compile *c)
&tgsi_dst->Indirect : NULL;
store->num_components = 4;
+ store->const_index[0] = 0xf;
store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index eea5c5e28db..1794059342b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1321,6 +1321,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
case nir_deref_array_type_direct:
/* direct access does not require anything special: */
for (int i = 0; i < intr->num_components; i++) {
+ /* ttn doesn't generate partial writemasks */
+ assert(intr->const_index[0] ==
+ (1 << intr->num_components) - 1);
+
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
arr->arr[n] = src[i];
@@ -1333,6 +1337,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *addr =
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
for (int i = 0; i < intr->num_components; i++) {
+ /* ttn doesn't generate partial writemasks */
+ assert(intr->const_index[0] ==
+ (1 << intr->num_components) - 1);
+
struct ir3_instruction *store;
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index df8a18a7f49..c0de7c4af98 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1067,6 +1067,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_instr *store_instr =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store_instr->num_components = ir->return_deref->type->vector_elements;
+ store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
store_instr->variables[0] =
evaluate_deref(&store_instr->instr, ir->return_deref);
@@ -1165,6 +1166,7 @@ nir_visitor::visit(ir_assignment *ir)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
store->num_components = ir->lhs->type->vector_elements;
+ store->const_index[0] = (1 << store->num_components) - 1;
nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
store->variables[0] = nir_deref_as_var(store_deref);
store->src[0] = nir_src_for_ssa(src);
diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 332bb0246df..5883d86e907 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -310,13 +310,15 @@ nir_load_var(nir_builder *build, nir_variable *var)
}
static inline void
-nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
+ unsigned writemask)
{
const unsigned num_components = glsl_get_vector_elements(var->type);
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
store->num_components = num_components;
+ store->const_index[0] = writemask;
store->variables[0] = nir_deref_var_create(store, var);
store->src[0] = nir_src_for_ssa(value);
nir_builder_instr_insert(build, &store->instr);
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index ec9e845574a..5815dbecb68 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -43,7 +43,7 @@
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
/*
@@ -302,10 +302,10 @@ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
#define STORE(name, srcs, indices, flags) \
INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
-/* src[] = { value, offset }. const_index[] = { base } */
-STORE(output, 2, 1, 0)
-/* src[] = { value, vertex, offset }. const_index[] = { base } */
-STORE(per_vertex_output, 3, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+STORE(output, 2, 2, 0)
+/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
+STORE(per_vertex_output, 3, 2, 0)
/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
STORE(ssbo, 3, 1, 0)
/* src[] = { value, offset }. const_index[] = { base, write_mask } */
diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c
index e0d067885d8..13254599088 100644
--- a/src/glsl/nir/nir_lower_gs_intrinsics.c
+++ b/src/glsl/nir/nir_lower_gs_intrinsics.c
@@ -99,7 +99,8 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
/* Increment the vertex count by 1 */
nir_store_var(b, state->vertex_count_var,
- nir_iadd(b, count, nir_imm_int(b, 1)));
+ nir_iadd(b, count, nir_imm_int(b, 1)),
+ 0x1); /* .x */
nir_instr_remove(&intrin->instr);
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 3d646eb14b4..a3565cc52ea 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -261,6 +261,9 @@ nir_lower_io_block(nir_block *block, void *void_state)
store->const_index[0] =
intrin->variables[0]->var->data.driver_location;
+ /* Copy the writemask */
+ store->const_index[1] = intrin->const_index[0];
+
if (per_vertex)
store->src[1] = nir_src_for_ssa(vertex_index);
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c
index 17b53ca36f3..3e21ac0cdd5 100644
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
- mov->dest.write_mask = (1 << intrin->num_components) - 1;
+ mov->dest.write_mask = intrin->const_index[0];
mov->dest.dest.is_ssa = false;
mov->dest.dest.reg.reg = reg_src.reg.reg;
mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c
index 98c107aa50e..a9017de5449 100644
--- a/src/glsl/nir/nir_lower_var_copies.c
+++ b/src/glsl/nir/nir_lower_var_copies.c
@@ -128,6 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
store->num_components = num_components;
+ store->const_index[0] = (1 << num_components) - 1;
store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
store->src[0].is_ssa = true;
diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c
index e670dbdc7e7..3ec0e1d9960 100644
--- a/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -26,6 +26,7 @@
*/
#include "nir.h"
+#include "nir_builder.h"
#include "nir_vla.h"
@@ -590,6 +591,9 @@ add_phi_sources(nir_block *block, nir_block *pred,
static bool
rename_variables_block(nir_block *block, struct lower_variables_state *state)
{
+ nir_builder b;
+ nir_builder_init(&b, state->impl);
+
nir_foreach_instr_safe(block, instr) {
if (instr->type == nir_instr_type_phi) {
nir_phi_instr *phi = nir_instr_as_phi(instr);
@@ -675,20 +679,40 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
assert(intrin->src[0].is_ssa);
- nir_alu_instr *mov = nir_alu_instr_create(state->shader,
- nir_op_imov);
- mov->src[0].src.is_ssa = true;
- mov->src[0].src.ssa = intrin->src[0].ssa;
- for (unsigned i = intrin->num_components; i < 4; i++)
- mov->src[0].swizzle[i] = 0;
+ nir_ssa_def *new_def;
+ b.cursor = nir_before_instr(&intrin->instr);
- mov->dest.write_mask = (1 << intrin->num_components) - 1;
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- intrin->num_components, NULL);
+ if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
+ /* Whole variable store - just copy the source. Note that
+ * intrin->num_components and intrin->src[0].ssa->num_components
+ * may differ.
+ */
+ unsigned swiz[4];
+ for (unsigned i = 0; i < 4; i++)
+ swiz[i] = i < intrin->num_components ? i : 0;
+
+ new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+ intrin->num_components, false);
+ } else {
+ nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state);
+ /* For writemasked store_var intrinsics, we combine the newly
+ * written values with the existing contents of unwritten
+ * channels, creating a new SSA value for the whole vector.
+ */
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < intrin->num_components; i++) {
+ if (intrin->const_index[0] & (1 << i)) {
+ srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+ } else {
+ srcs[i] = nir_channel(&b, old_def, i);
+ }
+ }
+ new_def = nir_vec(&b, srcs, intrin->num_components);
+ }
- nir_instr_insert_before(&intrin->instr, &mov->instr);
+ assert(new_def->num_components == intrin->num_components);
- def_stack_push(node, &mov->dest.dest.ssa, state);
+ def_stack_push(node, new_def, state);
/* We'll wait to remove the instruction until the next pass
* where we pop the node we just pushed back off the stack.
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index 06879d64ee2..89cf0b89208 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -417,6 +417,8 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
instr->variables[0]->var->data.mode != nir_var_uniform &&
instr->variables[0]->var->data.mode != nir_var_shader_storage);
+ /* Currently, writemasks must cover the entire value */
+ assert(instr->const_index[0] == (1 << instr->num_components) - 1);
break;
}
case nir_intrinsic_copy_var:
diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index ff671592763..14a339cf96e 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -927,6 +927,7 @@ ptn_add_output_stores(struct ptn_compile *c)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
store->num_components = glsl_get_vector_elements(var->type);
+ store->const_index[0] = (1 << store->num_components) - 1;
store->variables[0] =
nir_deref_var_create(store, c->output_vars[var->data.location]);
@@ -997,6 +998,7 @@ setup_registers_and_variables(struct ptn_compile *c)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store->num_components = 4;
+ store->const_index[0] = WRITEMASK_XYZW;
store->variables[0] = nir_deref_var_create(store, fullvar);
store->src[0] = nir_src_for_ssa(f001);
nir_builder_instr_insert(b, &store->instr);