summaryrefslogtreecommitdiffstats
path: root/src/glsl/nir
diff options
context:
space:
mode:
Diffstat (limited to 'src/glsl/nir')
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp145
-rw-r--r--src/glsl/nir/nir_intrinsics.h54
-rw-r--r--src/glsl/nir/nir_lower_atomics.c5
-rw-r--r--src/glsl/nir/nir_lower_io.c2
-rw-r--r--src/glsl/nir/nir_lower_phis_to_scalar.c5
-rw-r--r--src/glsl/nir/nir_lower_samplers.cpp55
-rw-r--r--src/glsl/nir/nir_opt_algebraic.py2
-rw-r--r--src/glsl/nir/nir_opt_peephole_ffma.c19
-rw-r--r--src/glsl/nir/nir_opt_peephole_select.c4
9 files changed, 228 insertions, 63 deletions
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 7c30be3fa72..0338af67567 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -65,6 +65,7 @@ public:
virtual void visit(ir_dereference_variable *);
virtual void visit(ir_dereference_record *);
virtual void visit(ir_dereference_array *);
+ virtual void visit(ir_barrier *);
void create_function(ir_function *ir);
@@ -615,27 +616,135 @@ nir_visitor::visit(ir_call *ir)
op = nir_intrinsic_atomic_counter_inc_var;
} else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) {
op = nir_intrinsic_atomic_counter_dec_var;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) {
+ op = nir_intrinsic_image_load;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) {
+ op = nir_intrinsic_image_store;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) {
+ op = nir_intrinsic_image_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) {
+ op = nir_intrinsic_image_atomic_min;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) {
+ op = nir_intrinsic_image_atomic_max;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) {
+ op = nir_intrinsic_image_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) {
+ op = nir_intrinsic_image_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) {
+ op = nir_intrinsic_image_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) {
+ op = nir_intrinsic_image_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) {
+ op = nir_intrinsic_image_atomic_comp_swap;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) {
+ op = nir_intrinsic_memory_barrier;
} else {
unreachable("not reached");
}
nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
- ir_dereference *param =
- (ir_dereference *) ir->actual_parameters.get_head();
- instr->variables[0] = evaluate_deref(&instr->instr, param);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+
+ switch (op) {
+ case nir_intrinsic_atomic_counter_read_var:
+ case nir_intrinsic_atomic_counter_inc_var:
+ case nir_intrinsic_atomic_counter_dec_var: {
+ ir_dereference *param =
+ (ir_dereference *) ir->actual_parameters.get_head();
+ instr->variables[0] = evaluate_deref(&instr->instr, param);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ break;
+ }
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap: {
+ nir_ssa_undef_instr *instr_undef =
+ nir_ssa_undef_instr_create(shader, 1);
+ nir_instr_insert_after_cf_list(this->cf_node_list,
+ &instr_undef->instr);
+
+ /* Set the image variable dereference. */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_dereference *image = (ir_dereference *)param;
+ const glsl_type *type =
+ image->variable_referenced()->type->without_array();
+
+ instr->variables[0] = evaluate_deref(&instr->instr, image);
+ param = param->get_next();
+
+ /* Set the address argument, extending the coordinate vector to four
+ * components.
+ */
+ const nir_src src_addr = evaluate_rvalue((ir_dereference *)param);
+ nir_alu_instr *instr_addr = nir_alu_instr_create(shader, nir_op_vec4);
+ nir_ssa_dest_init(&instr_addr->instr, &instr_addr->dest.dest, 4, NULL);
+
+ for (int i = 0; i < 4; i++) {
+ if (i < type->coordinate_components()) {
+ instr_addr->src[i].src = src_addr;
+ instr_addr->src[i].swizzle[0] = i;
+ } else {
+ instr_addr->src[i].src = nir_src_for_ssa(&instr_undef->def);
+ }
+ }
+
+ nir_instr_insert_after_cf_list(cf_node_list, &instr_addr->instr);
+ instr->src[0] = nir_src_for_ssa(&instr_addr->dest.dest.ssa);
+ param = param->get_next();
+
+ /* Set the sample argument, which is undefined for single-sample
+ * images.
+ */
+ if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
+ instr->src[1] = evaluate_rvalue((ir_dereference *)param);
+ param = param->get_next();
+ } else {
+ instr->src[1] = nir_src_for_ssa(&instr_undef->def);
+ }
+
+ /* Set the intrinsic parameters. */
+ if (!param->is_tail_sentinel()) {
+ instr->src[2] = evaluate_rvalue((ir_dereference *)param);
+ param = param->get_next();
+ }
+
+ if (!param->is_tail_sentinel()) {
+ instr->src[3] = evaluate_rvalue((ir_dereference *)param);
+ param = param->get_next();
+ }
+
+ /* Set the intrinsic destination. */
+ if (ir->return_deref)
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ ir->return_deref->type->vector_elements, NULL);
+ break;
+ }
+ case nir_intrinsic_memory_barrier:
+ break;
+ default:
+ unreachable("not reached");
+ }
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
- nir_intrinsic_instr *store_instr =
- nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
- store_instr->num_components = 1;
+ if (ir->return_deref) {
+ nir_intrinsic_instr *store_instr =
+ nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+ store_instr->num_components = ir->return_deref->type->vector_elements;
- store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref);
- store_instr->src[0].is_ssa = true;
- store_instr->src[0].ssa = &instr->dest.ssa;
+ store_instr->variables[0] =
+ evaluate_deref(&store_instr->instr, ir->return_deref);
+ store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa);
- nir_instr_insert_after_cf_list(this->cf_node_list, &store_instr->instr);
+ nir_instr_insert_after_cf_list(this->cf_node_list,
+ &store_instr->instr);
+ }
return;
}
@@ -823,13 +932,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
}
nir_dest *dest = get_instr_dest(this->result);
-
assert(dest->is_ssa);
- nir_src src = NIR_SRC_INIT;
- src.is_ssa = true;
- src.ssa = &dest->ssa;
- return src;
+ return nir_src_for_ssa(&dest->ssa);
}
nir_alu_instr *
@@ -1786,3 +1891,11 @@ nir_visitor::visit(ir_dereference_array *ir)
ralloc_steal(this->deref_tail, deref);
this->deref_tail = &deref->deref;
}
+
+void
+nir_visitor::visit(ir_barrier *ir)
+{
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier);
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 8e28765c13a..bc6e6b8f498 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -67,7 +67,15 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
*/
#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
+BARRIER(barrier)
BARRIER(discard)
+
+/*
+ * Memory barrier with semantics analogous to the memoryBarrier() GLSL
+ * intrinsic.
+ */
+BARRIER(memory_barrier)
+
/** A conditional discard, with a single boolean source. */
INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
@@ -89,6 +97,33 @@ ATOMIC(inc, 0)
ATOMIC(dec, 0)
ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
+/*
+ * Image load, store and atomic intrinsics.
+ *
+ * All image intrinsics take an image target passed as a nir_variable. Image
+ * variables contain a number of memory and layout qualifiers that influence
+ * the semantics of the intrinsic.
+ *
+ * All image intrinsics take a four-coordinate vector and a sample index as
+ * first two sources, determining the location within the image that will be
+ * accessed by the intrinsic. Components not applicable to the image target
+ * in use are undefined. Image store takes an additional four-component
+ * argument with the value to be written, and image atomic operations take
+ * either one or two additional scalar arguments with the same meaning as in
+ * the ARB_shader_image_load_store specification.
+ */
+INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0)
+INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
+
#define SYSTEM_VALUE(name, components) \
INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@@ -104,12 +139,11 @@ SYSTEM_VALUE(sample_mask_in, 1)
SYSTEM_VALUE(invocation_id, 1)
/*
- * The first index is the address to load from, and the second index is the
- * number of array elements to load. Indirect loads have an additional
- * register input, which is added to the constant address to compute the
- * final address to load from. For UBO's (and SSBO's), the first source is
- * the (possibly constant) UBO buffer index and the indirect (if it exists)
- * is the second source.
+ * The first and only index is the base address to load from. Indirect
+ * loads have an additional register input, which is added to the constant
+ * address to compute the final address to load from. For UBO's (and
+ * SSBO's), the first source is the (possibly constant) UBO buffer index
+ * and the indirect (if it exists) is the second source.
*
* For vector backends, the address is in terms of one vec4, and so each array
* element is +4 scalar components from the previous array element. For scalar
@@ -118,9 +152,9 @@ SYSTEM_VALUE(invocation_id, 1)
*/
#define LOAD(name, extra_srcs, flags) \
- INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+ INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \
INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
- true, 0, 0, 2, flags)
+ true, 0, 0, 1, flags)
LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@@ -138,7 +172,7 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
num_indices, flags) \
-STORE(output, 2, 0)
-/* STORE(ssbo, 3, 0) */
+STORE(output, 1, 0)
+/* STORE(ssbo, 2, 0) */
LAST_INTRINSIC(store_output_indirect)
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
index e82df016969..0457de60d9a 100644
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -78,7 +78,8 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
nir_deref_as_array(instr->variables[0]->deref.child);
assert(deref_array->deref.child == NULL);
- offset_const->value.u[0] += deref_array->base_offset;
+ offset_const->value.u[0] +=
+ deref_array->base_offset * ATOMIC_COUNTER_SIZE;
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_load_const_instr *atomic_counter_size =
@@ -108,7 +109,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
}
new_instr->src[0].is_ssa = true;
- new_instr->src[0].ssa = offset_def;;
+ new_instr->src[0].ssa = offset_def;
if (instr->dest.is_ssa) {
nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 561bebd3a9c..4c59298ecb7 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -289,7 +289,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
offset += intrin->variables[0]->var->data.driver_location;
load->const_index[0] = offset;
- load->const_index[1] = 1;
if (has_indirect)
load->src[0] = indirect;
@@ -332,7 +331,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
offset += intrin->variables[0]->var->data.driver_location;
store->const_index[0] = offset;
- store->const_index[1] = 1;
nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c
index 4bdb80072ab..a57d253975d 100644
--- a/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -153,6 +153,11 @@ should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state)
break;
}
+ /* The hash table entry for 'phi' may have changed while recursing the
+ * dependence graph, so we need to reset it */
+ entry = _mesa_hash_table_search(state->phi_table, phi);
+ assert(entry);
+
entry->data = (void *)(intptr_t)scalarizable;
return scalarizable;
diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp
index 6ed5a4cb2b5..9a9cdd16a9a 100644
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ b/src/glsl/nir/nir_lower_samplers.cpp
@@ -94,34 +94,45 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr
case nir_deref_type_array: {
nir_deref_array *deref_array = nir_deref_as_array(deref->child);
+ assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+
+ if (deref_array->deref.child) {
+ ralloc_asprintf_append(&name, "[%u]",
+ deref_array->deref_array_type == nir_deref_array_type_direct ?
+ deref_array->base_offset : 0);
+ } else {
+ assert(deref->child->type->base_type == GLSL_TYPE_SAMPLER);
+ instr->sampler_index = deref_array->base_offset;
+ }
+
/* XXX: We're assuming here that the indirect is the last array
* thing we have. This should be ok for now as we don't support
* arrays_of_arrays yet.
*/
-
- instr->sampler_index *= glsl_get_length(deref->type);
- switch (deref_array->deref_array_type) {
- case nir_deref_array_type_direct:
- instr->sampler_index += deref_array->base_offset;
- if (deref_array->deref.child)
- ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
- break;
- case nir_deref_array_type_indirect: {
- add_indirect_to_tex(instr, deref_array->indirect);
- nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
- NIR_SRC_INIT);
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ /* First, we have to resize the array of texture sources */
+ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+ instr->num_srcs + 1);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ new_srcs[i].src_type = instr->src[i].src_type;
+ nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+ &instr->src[i].src);
+ }
+
+ ralloc_free(instr->src);
+ instr->src = new_srcs;
+
+ /* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
+ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+ instr->num_srcs++;
+ nir_instr_move_src(&instr->instr,
+ &instr->src[instr->num_srcs - 1].src,
+ &deref_array->indirect);
instr->sampler_array_size = glsl_get_length(deref->type);
-
- if (deref_array->deref.child)
- ralloc_strcat(&name, "[0]");
- break;
- }
-
- case nir_deref_array_type_wildcard:
- unreachable("Cannot copy samplers");
- default:
- unreachable("Invalid deref array type");
}
break;
}
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index fa039222fd2..eace791f5b0 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -156,6 +156,8 @@ optimizations = [
(('fpow', a, 2.0), ('fmul', a, a)),
(('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
(('fpow', 2.0, a), ('fexp2', a)),
+ (('fpow', ('fpow', a, 2.2), 0.454545), a),
+ (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
(('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
(('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
(('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c
index b430eac8eab..798506b7595 100644
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def)
}
static nir_alu_instr *
-get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+get_mul_for_src(nir_alu_src *src, int num_components,
+ uint8_t swizzle[4], bool *negate, bool *abs)
{
assert(src->src.is_ssa && !src->abs && !src->negate);
@@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
switch (alu->op) {
case nir_op_imov:
case nir_op_fmov:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
break;
case nir_op_fneg:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
*negate = !*negate;
break;
case nir_op_fabs:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
*negate = false;
*abs = true;
break;
@@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
if (!alu)
return NULL;
- for (unsigned i = 0; i < 4; i++) {
- if (!(alu->dest.write_mask & (1 << i)))
- break;
-
+ for (unsigned i = 0; i < num_components; i++)
swizzle[i] = swizzle[src->swizzle[i]];
- }
return alu;
}
@@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
negate = false;
abs = false;
- mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+ mul = get_mul_for_src(&add->src[add_mul_src],
+ add->dest.dest.ssa.num_components,
+ swizzle, &negate, &abs);
if (mul != NULL)
break;
diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c
index 82c65bb442f..ef7c9775aa3 100644
--- a/src/glsl/nir/nir_opt_peephole_select.c
+++ b/src/glsl/nir/nir_opt_peephole_select.c
@@ -86,7 +86,9 @@ block_check_for_allowed_instrs(nir_block *block)
nir_alu_instr *mov = nir_instr_as_alu(instr);
if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
- mov->op != nir_op_fabs && mov->op != nir_op_iabs)
+ mov->op != nir_op_fabs && mov->op != nir_op_iabs &&
+ mov->op != nir_op_vec2 && mov->op != nir_op_vec3 &&
+ mov->op != nir_op_vec4)
return false;
/* Can't handle saturate */