summaryrefslogtreecommitdiffstats
path: root/src/compiler/nir
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-04-09 17:28:13 +0100
committerJason Ekstrand <[email protected]>2019-08-08 12:10:39 -0500
commit8bd2e138f5db8ab6f82ec7597edb946d84edd3bf (patch)
tree4159de089b1f3722776a6485cc7325147a7b031a /src/compiler/nir
parent1e21bb41233c4c4abe95f734f815a7e415ad7e19 (diff)
nir/lower_explicit_io: add nir_var_mem_shared support
v2: require nir_address_format_32bit_offset instead v3: don't call nir_intrinsic_set_access() for shared atomics Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/compiler/nir')
-rw-r--r--src/compiler/nir/nir_lower_io.c77
1 files changed, 52 insertions, 25 deletions
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 36877fc0346..f8bcf4420e0 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -93,6 +93,31 @@ global_atomic_for_deref(nir_intrinsic_op deref_op)
}
}
+static nir_intrinsic_op
+shared_atomic_for_deref(nir_intrinsic_op deref_op)
+{
+ switch (deref_op) {
+#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
+ OP(atomic_exchange)
+ OP(atomic_comp_swap)
+ OP(atomic_add)
+ OP(atomic_imin)
+ OP(atomic_umin)
+ OP(atomic_imax)
+ OP(atomic_umax)
+ OP(atomic_and)
+ OP(atomic_or)
+ OP(atomic_xor)
+ OP(atomic_fadd)
+ OP(atomic_fmin)
+ OP(atomic_fmax)
+ OP(atomic_fcomp_swap)
+#undef OP
+ default:
+ unreachable("Invalid shared atomic");
+ }
+}
+
void
nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
int (*type_size)(const struct glsl_type *, bool))
@@ -427,27 +452,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
nir_builder *b = &state->builder;
assert(var->data.mode == nir_var_mem_shared);
- nir_intrinsic_op op;
- switch (intrin->intrinsic) {
-#define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break;
- OP(atomic_exchange)
- OP(atomic_comp_swap)
- OP(atomic_add)
- OP(atomic_imin)
- OP(atomic_umin)
- OP(atomic_imax)
- OP(atomic_umax)
- OP(atomic_and)
- OP(atomic_or)
- OP(atomic_xor)
- OP(atomic_fadd)
- OP(atomic_fmin)
- OP(atomic_fmax)
- OP(atomic_fcomp_swap)
-#undef OP
- default:
- unreachable("Invalid atomic");
- }
+ nir_intrinsic_op op = shared_atomic_for_deref(intrin->intrinsic);
nir_intrinsic_instr *atomic =
nir_intrinsic_instr_create(state->builder.shader, op);
@@ -849,6 +854,10 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
assert(addr_format_is_global(addr_format));
op = nir_intrinsic_load_kernel_input;
break;
+ case nir_var_mem_shared:
+ assert(addr_format == nir_address_format_32bit_offset);
+ op = nir_intrinsic_load_shared;
+ break;
default:
unreachable("Unsupported explicit IO variable mode");
}
@@ -857,12 +866,15 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
if (addr_format_is_global(addr_format)) {
load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+ } else if (addr_format == nir_address_format_32bit_offset) {
+ assert(addr->num_components == 1);
+ load->src[0] = nir_src_for_ssa(addr);
} else {
load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
}
- if (mode != nir_var_mem_ubo && mode != nir_var_shader_in)
+ if (mode != nir_var_mem_ubo && mode != nir_var_shader_in && mode != nir_var_mem_shared)
nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
/* TODO: We should try and provide a better alignment. For OpenCL, we need
@@ -919,6 +931,10 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
assert(addr_format_is_global(addr_format));
op = nir_intrinsic_store_global;
break;
+ case nir_var_mem_shared:
+ assert(addr_format == nir_address_format_32bit_offset);
+ op = nir_intrinsic_store_shared;
+ break;
default:
unreachable("Unsupported explicit IO variable mode");
}
@@ -928,6 +944,9 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
store->src[0] = nir_src_for_ssa(value);
if (addr_format_is_global(addr_format)) {
store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+ } else if (addr_format == nir_address_format_32bit_offset) {
+ assert(addr->num_components == 1);
+ store->src[1] = nir_src_for_ssa(addr);
} else {
store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
@@ -935,7 +954,8 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
nir_intrinsic_set_write_mask(store, write_mask);
- nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
+ if (mode != nir_var_mem_shared)
+ nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
/* TODO: We should try and provide a better alignment. For OpenCL, we need
* to plumb the alignment through from SPIR-V when we have one.
@@ -980,6 +1000,10 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
assert(addr_format_is_global(addr_format));
op = global_atomic_for_deref(intrin->intrinsic);
break;
+ case nir_var_mem_shared:
+ assert(addr_format == nir_address_format_32bit_offset);
+ op = shared_atomic_for_deref(intrin->intrinsic);
+ break;
default:
unreachable("Unsupported explicit IO variable mode");
}
@@ -989,6 +1013,9 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
unsigned src = 0;
if (addr_format_is_global(addr_format)) {
atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+ } else if (addr_format == nir_address_format_32bit_offset) {
+ assert(addr->num_components == 1);
+ atomic->src[src++] = nir_src_for_ssa(addr);
} else {
atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
@@ -1000,7 +1027,7 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
/* Global atomics don't have access flags because they assume that the
* address may be non-uniform.
*/
- if (!addr_format_is_global(addr_format))
+ if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared)
nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
assert(intrin->dest.ssa.num_components == 1);
@@ -1032,7 +1059,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
assert(deref->dest.is_ssa);
switch (deref->deref_type) {
case nir_deref_type_var:
- assert(deref->mode == nir_var_shader_in);
+ assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared));
return nir_imm_intN_t(b, deref->var->data.driver_location,
deref->dest.ssa.bit_size);