diff options
Diffstat (limited to 'src/glsl/nir')
-rw-r--r-- | src/glsl/nir/glsl_to_nir.cpp | 145 | ||||
-rw-r--r-- | src/glsl/nir/nir_intrinsics.h | 54 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_atomics.c | 5 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_io.c | 2 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_phis_to_scalar.c | 5 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_samplers.cpp | 55 | ||||
-rw-r--r-- | src/glsl/nir/nir_opt_algebraic.py | 2 | ||||
-rw-r--r-- | src/glsl/nir/nir_opt_peephole_ffma.c | 19 | ||||
-rw-r--r-- | src/glsl/nir/nir_opt_peephole_select.c | 4 |
9 files changed, 228 insertions, 63 deletions
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 7c30be3fa72..0338af67567 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -65,6 +65,7 @@ public: virtual void visit(ir_dereference_variable *); virtual void visit(ir_dereference_record *); virtual void visit(ir_dereference_array *); + virtual void visit(ir_barrier *); void create_function(ir_function *ir); @@ -615,27 +616,135 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_atomic_counter_inc_var; } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) { op = nir_intrinsic_atomic_counter_dec_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) { + op = nir_intrinsic_image_load; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) { + op = nir_intrinsic_image_store; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) { + op = nir_intrinsic_image_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) { + op = nir_intrinsic_image_atomic_min; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) { + op = nir_intrinsic_image_atomic_max; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) { + op = nir_intrinsic_image_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) { + op = nir_intrinsic_image_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) { + op = nir_intrinsic_image_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) { + op = nir_intrinsic_image_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) { + op = nir_intrinsic_image_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) { + op = nir_intrinsic_memory_barrier; } else { unreachable("not reached"); } nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); - ir_dereference *param = - (ir_dereference *) ir->actual_parameters.get_head(); - instr->variables[0] = evaluate_deref(&instr->instr, param); - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + + switch (op) { + case nir_intrinsic_atomic_counter_read_var: + case nir_intrinsic_atomic_counter_inc_var: + case nir_intrinsic_atomic_counter_dec_var: { + ir_dereference *param = + (ir_dereference *) ir->actual_parameters.get_head(); + instr->variables[0] = evaluate_deref(&instr->instr, param); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + break; + } + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: { + nir_ssa_undef_instr *instr_undef = + nir_ssa_undef_instr_create(shader, 1); + nir_instr_insert_after_cf_list(this->cf_node_list, + &instr_undef->instr); + + /* Set the image variable dereference. */ + exec_node *param = ir->actual_parameters.get_head(); + ir_dereference *image = (ir_dereference *)param; + const glsl_type *type = + image->variable_referenced()->type->without_array(); + + instr->variables[0] = evaluate_deref(&instr->instr, image); + param = param->get_next(); + + /* Set the address argument, extending the coordinate vector to four + * components. + */ + const nir_src src_addr = evaluate_rvalue((ir_dereference *)param); + nir_alu_instr *instr_addr = nir_alu_instr_create(shader, nir_op_vec4); + nir_ssa_dest_init(&instr_addr->instr, &instr_addr->dest.dest, 4, NULL); + + for (int i = 0; i < 4; i++) { + if (i < type->coordinate_components()) { + instr_addr->src[i].src = src_addr; + instr_addr->src[i].swizzle[0] = i; + } else { + instr_addr->src[i].src = nir_src_for_ssa(&instr_undef->def); + } + } + + nir_instr_insert_after_cf_list(cf_node_list, &instr_addr->instr); + instr->src[0] = nir_src_for_ssa(&instr_addr->dest.dest.ssa); + param = param->get_next(); + + /* Set the sample argument, which is undefined for single-sample + * images. + */ + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + instr->src[1] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } else { + instr->src[1] = nir_src_for_ssa(&instr_undef->def); + } + + /* Set the intrinsic parameters. */ + if (!param->is_tail_sentinel()) { + instr->src[2] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + instr->src[3] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } + + /* Set the intrinsic destination. */ + if (ir->return_deref) + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + break; + } + case nir_intrinsic_memory_barrier: + break; + default: + unreachable("not reached"); + } nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); - nir_intrinsic_instr *store_instr = - nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); - store_instr->num_components = 1; + if (ir->return_deref) { + nir_intrinsic_instr *store_instr = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); + store_instr->num_components = ir->return_deref->type->vector_elements; - store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref); - store_instr->src[0].is_ssa = true; - store_instr->src[0].ssa = &instr->dest.ssa; + store_instr->variables[0] = + evaluate_deref(&store_instr->instr, ir->return_deref); + store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa); - nir_instr_insert_after_cf_list(this->cf_node_list, &store_instr->instr); + nir_instr_insert_after_cf_list(this->cf_node_list, + &store_instr->instr); + } return; } @@ -823,13 +932,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) } nir_dest *dest = get_instr_dest(this->result); - assert(dest->is_ssa); - nir_src src = NIR_SRC_INIT; - src.is_ssa = true; - src.ssa = &dest->ssa; - return src; + return nir_src_for_ssa(&dest->ssa); } nir_alu_instr * @@ -1786,3 +1891,11 @@ nir_visitor::visit(ir_dereference_array *ir) ralloc_steal(this->deref_tail, deref); this->deref_tail = &deref->deref; } + +void +nir_visitor::visit(ir_barrier *ir) +{ + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier); + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); +} diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 8e28765c13a..bc6e6b8f498 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -67,7 +67,15 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, */ #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) +BARRIER(barrier) BARRIER(discard) + +/* + * Memory barrier with semantics analogous to the memoryBarrier() GLSL + * intrinsic. + */ +BARRIER(memory_barrier) + /** A conditional discard, with a single boolean source. */ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) @@ -89,6 +97,33 @@ ATOMIC(inc, 0) ATOMIC(dec, 0) ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) +/* + * Image load, store and atomic intrinsics. + * + * All image intrinsics take an image target passed as a nir_variable. Image + * variables contain a number of memory and layout qualifiers that influence + * the semantics of the intrinsic. + * + * All image intrinsics take a four-coordinate vector and a sample index as + * first two sources, determining the location within the image that will be + * accessed by the intrinsic. Components not applicable to the image target + * in use are undefined. Image store takes an additional four-component + * argument with the value to be written, and image atomic operations take + * either one or two additional scalar arguments with the same meaning as in + * the ARB_shader_image_load_store specification. + */ +INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0) +INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) + #define SYSTEM_VALUE(name, components) \ INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) @@ -104,12 +139,11 @@ SYSTEM_VALUE(sample_mask_in, 1) SYSTEM_VALUE(invocation_id, 1) /* - * The first index is the address to load from, and the second index is the - * number of array elements to load. Indirect loads have an additional - * register input, which is added to the constant address to compute the - * final address to load from. For UBO's (and SSBO's), the first source is - * the (possibly constant) UBO buffer index and the indirect (if it exists) - * is the second source. + * The first and only index is the base address to load from. Indirect + * loads have an additional register input, which is added to the constant + * address to compute the final address to load from. For UBO's (and + * SSBO's), the first source is the (possibly constant) UBO buffer index + * and the indirect (if it exists) is the second source. * * For vector backends, the address is in terms of one vec4, and so each array * element is +4 scalar components from the previous array element. For scalar @@ -118,9 +152,9 @@ SYSTEM_VALUE(invocation_id, 1) */ #define LOAD(name, extra_srcs, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \ + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \ INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, 2, flags) + true, 0, 0, 1, flags) LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) @@ -138,7 +172,7 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \ num_indices, flags) \ -STORE(output, 2, 0) -/* STORE(ssbo, 3, 0) */ +STORE(output, 1, 0) +/* STORE(ssbo, 2, 0) */ LAST_INTRINSIC(store_output_indirect) diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c index e82df016969..0457de60d9a 100644 --- a/src/glsl/nir/nir_lower_atomics.c +++ b/src/glsl/nir/nir_lower_atomics.c @@ -78,7 +78,8 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) nir_deref_as_array(instr->variables[0]->deref.child); assert(deref_array->deref.child == NULL); - offset_const->value.u[0] += deref_array->base_offset; + offset_const->value.u[0] += + deref_array->base_offset * ATOMIC_COUNTER_SIZE; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_load_const_instr *atomic_counter_size = @@ -108,7 +109,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) } new_instr->src[0].is_ssa = true; - new_instr->src[0].ssa = offset_def;; + new_instr->src[0].ssa = offset_def; if (instr->dest.is_ssa) { nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 561bebd3a9c..4c59298ecb7 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -289,7 +289,6 @@ nir_lower_io_block(nir_block *block, void *void_state) offset += intrin->variables[0]->var->data.driver_location; load->const_index[0] = offset; - load->const_index[1] = 1; if (has_indirect) load->src[0] = indirect; @@ -332,7 +331,6 @@ nir_lower_io_block(nir_block *block, void *void_state) offset += intrin->variables[0]->var->data.driver_location; store->const_index[0] = offset; - store->const_index[1] = 1; nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx); diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c index 4bdb80072ab..a57d253975d 100644 --- a/src/glsl/nir/nir_lower_phis_to_scalar.c +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -153,6 +153,11 @@ should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state) break; } + /* The hash table entry for 'phi' may have changed while recursing the + * dependence graph, so we need to reset it */ + entry = _mesa_hash_table_search(state->phi_table, phi); + assert(entry); + entry->data = (void *)(intptr_t)scalarizable; return scalarizable; diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 6ed5a4cb2b5..9a9cdd16a9a 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -94,34 +94,45 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr case nir_deref_type_array: { nir_deref_array *deref_array = nir_deref_as_array(deref->child); + assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); + + if (deref_array->deref.child) { + ralloc_asprintf_append(&name, "[%u]", + deref_array->deref_array_type == nir_deref_array_type_direct ? + deref_array->base_offset : 0); + } else { + assert(deref->child->type->base_type == GLSL_TYPE_SAMPLER); + instr->sampler_index = deref_array->base_offset; + } + /* XXX: We're assuming here that the indirect is the last array * thing we have. This should be ok for now as we don't support * arrays_of_arrays yet. */ - - instr->sampler_index *= glsl_get_length(deref->type); - switch (deref_array->deref_array_type) { - case nir_deref_array_type_direct: - instr->sampler_index += deref_array->base_offset; - if (deref_array->deref.child) - ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); - break; - case nir_deref_array_type_indirect: { - add_indirect_to_tex(instr, deref_array->indirect); - nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, - NIR_SRC_INIT); + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, + &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + nir_instr_move_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + &deref_array->indirect); instr->sampler_array_size = glsl_get_length(deref->type); - - if (deref_array->deref.child) - ralloc_strcat(&name, "[0]"); - break; - } - - case nir_deref_array_type_wildcard: - unreachable("Cannot copy samplers"); - default: - unreachable("Invalid deref array type"); } break; } diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index fa039222fd2..eace791f5b0 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -156,6 +156,8 @@ optimizations = [ (('fpow', a, 2.0), ('fmul', a, a)), (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), (('fpow', 2.0, a), ('fexp2', a)), + (('fpow', ('fpow', a, 2.2), 0.454545), a), + (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)), (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c index b430eac8eab..798506b7595 100644 --- a/src/glsl/nir/nir_opt_peephole_ffma.c +++ b/src/glsl/nir/nir_opt_peephole_ffma.c @@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def) } static nir_alu_instr * -get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) +get_mul_for_src(nir_alu_src *src, int num_components, + uint8_t swizzle[4], bool *negate, bool *abs) { assert(src->src.is_ssa && !src->abs && !src->negate); @@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) switch (alu->op) { case nir_op_imov: case nir_op_fmov: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); break; case nir_op_fneg: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); *negate = !*negate; break; case nir_op_fabs: - alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); *negate = false; *abs = true; break; @@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) if (!alu) return NULL; - for (unsigned i = 0; i < 4; i++) { - if (!(alu->dest.write_mask & (1 << i))) - break; - + for (unsigned i = 0; i < num_components; i++) swizzle[i] = swizzle[src->swizzle[i]]; - } return alu; } @@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state) negate = false; abs = false; - mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs); + mul = get_mul_for_src(&add->src[add_mul_src], + add->dest.dest.ssa.num_components, + swizzle, &negate, &abs); if (mul != NULL) break; diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c index 82c65bb442f..ef7c9775aa3 100644 --- a/src/glsl/nir/nir_opt_peephole_select.c +++ b/src/glsl/nir/nir_opt_peephole_select.c @@ -86,7 +86,9 @@ block_check_for_allowed_instrs(nir_block *block) nir_alu_instr *mov = nir_instr_as_alu(instr); if (mov->op != nir_op_fmov && mov->op != nir_op_imov && mov->op != nir_op_fneg && mov->op != nir_op_ineg && - mov->op != nir_op_fabs && mov->op != nir_op_iabs) + mov->op != nir_op_fabs && mov->op != nir_op_iabs && + mov->op != nir_op_vec2 && mov->op != nir_op_vec3 && + mov->op != nir_op_vec4) return false; /* Can't handle saturate */ |