diff options
-rw-r--r-- | src/compiler/nir/nir_intrinsics.py | 6 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 10 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 3 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 181 | ||||
-rw-r--r-- | src/intel/compiler/brw_nir.h | 5 | ||||
-rw-r--r-- | src/intel/compiler/brw_nir_lower_image_load_store.c | 41 | ||||
-rw-r--r-- | src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 159 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pipeline.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 130 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 1 |
10 files changed, 371 insertions, 169 deletions
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d7184dadbbc..b06b38fc2ce 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -331,9 +331,9 @@ image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) # variable. The const index specifies which of the six parameters to load. intrinsic("image_deref_load_param_intel", src_comp=[1], dest_comp=0, indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER]) -intrinsic("image_deref_load_raw_intel", src_comp=[1, 1], dest_comp=0, - flags=[CAN_ELIMINATE]) -intrinsic("image_deref_store_raw_intel", src_comp=[1, 1, 0]) +image("load_raw_intel", src_comp=[1], dest_comp=0, + flags=[CAN_ELIMINATE]) +image("store_raw_intel", src_comp=[1, 0]) # Vulkan descriptor set intrinsics # diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 58736503f9a..02a7a33c4d7 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -494,16 +494,14 @@ type_size_scalar(const struct glsl_type *type) } return size; case GLSL_TYPE_SAMPLER: - /* Samplers take up no register space, since they're baked in at - * link time. - */ - return 0; case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_IMAGE: + /* Samplers, atomics, and images take up no register space, since + * they're baked in at link time. + */ return 0; case GLSL_TYPE_SUBROUTINE: return 1; - case GLSL_TYPE_IMAGE: - return BRW_IMAGE_PARAM_SIZE; case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 52220db2dc0..aba19d5ab2c 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -216,6 +216,8 @@ public: nir_intrinsic_instr *instr); void nir_emit_cs_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); + fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); void nir_emit_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_tes_intrinsic(const brw::fs_builder &bld, @@ -235,7 +237,6 @@ public: fs_reg get_nir_src(const nir_src &src); fs_reg get_nir_src_imm(const nir_src &src); fs_reg get_nir_dest(const nir_dest &dest); - fs_reg get_nir_image_deref(nir_deref_instr *deref); fs_reg get_indirect_offset(nir_intrinsic_instr *instr); void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, unsigned wr_mask); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index b2be91f9117..aaba0e2a693 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1694,70 +1694,6 @@ fs_visitor::get_nir_dest(const nir_dest &dest) } } -fs_reg -fs_visitor::get_nir_image_deref(nir_deref_instr *deref) -{ - fs_reg arr_offset = brw_imm_ud(0); - unsigned array_size = BRW_IMAGE_PARAM_SIZE * 4; - nir_deref_instr *head = deref; - while (head->deref_type != nir_deref_type_var) { - assert(head->deref_type == nir_deref_type_array); - - /* This level's element size is the previous level's array size */ - const unsigned elem_size = array_size; - - fs_reg index = retype(get_nir_src_imm(head->arr.index), - BRW_REGISTER_TYPE_UD); - if (arr_offset.file == BRW_IMMEDIATE_VALUE && - index.file == BRW_IMMEDIATE_VALUE) { - arr_offset.ud += index.ud * elem_size; - } else if (index.file == BRW_IMMEDIATE_VALUE) { - bld.ADD(arr_offset, arr_offset, brw_imm_ud(index.ud * elem_size)); - } else { - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.MUL(tmp, index, brw_imm_ud(elem_size)); - bld.ADD(tmp, tmp, arr_offset); - arr_offset = tmp; - } - - head = nir_deref_instr_parent(head); - assert(glsl_type_is_array(head->type)); - array_size = elem_size * glsl_get_length(head->type); - } - - assert(head->deref_type == nir_deref_type_var); - const unsigned max_arr_offset = array_size - (BRW_IMAGE_PARAM_SIZE * 4); - fs_reg image(UNIFORM, head->var->data.driver_location / 4, - BRW_REGISTER_TYPE_UD); - - if (arr_offset.file == BRW_IMMEDIATE_VALUE) { - /* The offset is in bytes but we want it in dwords */ - return offset(image, bld, MIN2(arr_offset.ud, max_arr_offset) / 4); - } else { - /* Accessing an invalid surface index with the dataport can result - * in a hang. According to the spec "if the index used to - * select an individual element is negative or greater than or - * equal to the size of the array, the results of the operation - * are undefined but may not lead to termination" -- which is one - * of the possible outcomes of the hang. Clamp the index to - * prevent access outside of the array bounds. - */ - bld.emit_minmax(arr_offset, arr_offset, brw_imm_ud(max_arr_offset), - BRW_CONDITIONAL_L); - - /* Emit a pile of MOVs to load the uniform into a temporary. The - * dead-code elimination pass will get rid of what we don't use. - */ - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE); - for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) { - bld.emit(SHADER_OPCODE_MOV_INDIRECT, - offset(tmp, bld, j), offset(image, bld, j), - arr_offset, brw_imm_ud(max_arr_offset + 4)); - } - return tmp; - } -} - void fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, unsigned wr_mask) @@ -3847,6 +3783,43 @@ brw_cond_mod_for_nir_reduction_op(nir_op op) } } +fs_reg +fs_visitor::get_nir_image_intrinsic_image(const brw::fs_builder &bld, + nir_intrinsic_instr *instr) +{ + fs_reg image = retype(get_nir_src_imm(instr->src[0]), BRW_REGISTER_TYPE_UD); + + if (stage_prog_data->binding_table.image_start > 0) { + if (image.file == BRW_IMMEDIATE_VALUE) { + image.d += stage_prog_data->binding_table.image_start; + } else { + bld.ADD(image, image, + brw_imm_d(stage_prog_data->binding_table.image_start)); + } + } + + return bld.emit_uniformize(image); +} + +static unsigned +image_intrinsic_coord_components(nir_intrinsic_instr *instr) +{ + switch (nir_intrinsic_image_dim(instr)) { + case GLSL_SAMPLER_DIM_1D: + return 1 + nir_intrinsic_image_array(instr); + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + return 2 + nir_intrinsic_image_array(instr); + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + return 3; + case GLSL_SAMPLER_DIM_BUF: + return 1; + default: + unreachable("Invalid image dimension"); + } +} + void fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -3855,40 +3828,37 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr dest = get_nir_dest(instr->dest); switch (instr->intrinsic) { - case nir_intrinsic_image_deref_load: - case nir_intrinsic_image_deref_store: - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: { + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: { if (stage == MESA_SHADER_FRAGMENT && - instr->intrinsic != nir_intrinsic_image_deref_load) + instr->intrinsic != nir_intrinsic_image_load) brw_wm_prog_data(prog_data)->has_side_effects = true; - /* Get the referenced image variable and type. */ - nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); - const glsl_type *type = deref->type; - /* Get some metadata from the image intrinsic. */ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; - const unsigned dims = type->coordinate_components(); + const unsigned dims = image_intrinsic_coord_components(instr); + const GLenum format = nir_intrinsic_format(instr); const unsigned dest_components = nir_intrinsic_dest_components(instr); /* Get the arguments of the image intrinsic. */ - const fs_reg image = get_nir_image_deref(deref); + const fs_reg image = get_nir_image_intrinsic_image(bld, instr); const fs_reg coords = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD); fs_reg tmp; /* Emit an image load, store or atomic op. */ - if (instr->intrinsic == nir_intrinsic_image_deref_load) { + if (instr->intrinsic == nir_intrinsic_image_load) { tmp = emit_typed_read(bld, image, coords, dims, instr->num_components); - } else if (instr->intrinsic == nir_intrinsic_image_deref_store) { + } else if (instr->intrinsic == nir_intrinsic_image_store) { const fs_reg src0 = get_nir_src(instr->src[3]); emit_typed_write(bld, image, coords, src0, dims, instr->num_components); @@ -3897,7 +3867,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned num_srcs = info->num_srcs; switch (instr->intrinsic) { - case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_atomic_add: assert(num_srcs == 4); op = get_op_for_atomic_add(instr, 3); @@ -3905,27 +3875,27 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (op != BRW_AOP_ADD) num_srcs = 3; break; - case nir_intrinsic_image_deref_atomic_min: - op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ? - BRW_AOP_IMIN : BRW_AOP_UMIN); + case nir_intrinsic_image_atomic_min: + assert(format == GL_R32UI || format == GL_R32I); + op = (format == GL_R32I) ? BRW_AOP_IMIN : BRW_AOP_UMIN; break; - case nir_intrinsic_image_deref_atomic_max: - op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ? - BRW_AOP_IMAX : BRW_AOP_UMAX); + case nir_intrinsic_image_atomic_max: + assert(format == GL_R32UI || format == GL_R32I); + op = (format == GL_R32I) ? BRW_AOP_IMAX : BRW_AOP_UMAX; break; - case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_atomic_and: op = BRW_AOP_AND; break; - case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_atomic_or: op = BRW_AOP_OR; break; - case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_atomic_xor: op = BRW_AOP_XOR; break; - case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: op = BRW_AOP_MOV; break; - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_atomic_comp_swap: op = BRW_AOP_CMPWR; break; default: @@ -3948,19 +3918,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_image_deref_load_param_intel: { - nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); - const fs_reg image = get_nir_image_deref(deref); - const fs_reg param = offset(image, bld, nir_intrinsic_base(instr) * 4); - for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) { - bld.MOV(offset(retype(dest, param.type), bld, c), - offset(param, bld, c)); - } - break; - } - - case nir_intrinsic_image_deref_load_raw_intel: { - const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0])); + case nir_intrinsic_image_load_raw_intel: { + const fs_reg image = get_nir_image_intrinsic_image(bld, instr); const fs_reg addr = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD); @@ -3974,8 +3933,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_image_deref_store_raw_intel: { - const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0])); + case nir_intrinsic_image_store_raw_intel: { + const fs_reg image = get_nir_image_intrinsic_image(bld, instr); const fs_reg addr = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD); const fs_reg data = retype(get_nir_src(instr->src[2]), @@ -4010,7 +3969,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_samples: /* The driver does not support multi-sampled images. */ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1)); break; diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 72a6ee8884a..50073265539 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -116,6 +116,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir); bool brw_nir_lower_image_load_store(nir_shader *nir, const struct gen_device_info *devinfo); +void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, + nir_ssa_def *index); nir_shader *brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, @@ -147,6 +149,9 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data); +void brw_nir_lower_glsl_images(nir_shader *shader, + const struct gl_program *prog); + void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, nir_shader *nir, const struct brw_vs_prog_key *vs_key, diff --git a/src/intel/compiler/brw_nir_lower_image_load_store.c b/src/intel/compiler/brw_nir_lower_image_load_store.c index 819fb440f2c..5eba9ddabd3 100644 --- a/src/intel/compiler/brw_nir_lower_image_load_store.c +++ b/src/intel/compiler/brw_nir_lower_image_load_store.c @@ -811,3 +811,44 @@ brw_nir_lower_image_load_store(nir_shader *shader, return progress; } + +void +brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, + nir_ssa_def *index) +{ + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + switch (intrin->intrinsic) { +#define CASE(op) \ + case nir_intrinsic_image_deref_##op: \ + intrin->intrinsic = nir_intrinsic_image_##op; \ + break; + CASE(load) + CASE(store) + CASE(atomic_add) + CASE(atomic_min) + CASE(atomic_max) + CASE(atomic_and) + CASE(atomic_or) + CASE(atomic_xor) + CASE(atomic_exchange) + CASE(atomic_comp_swap) + CASE(atomic_fadd) + CASE(size) + CASE(samples) + CASE(load_raw_intel) + CASE(store_raw_intel) +#undef CASE + default: + unreachable("Unhanded image intrinsic"); + } + + nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type)); + nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type)); + nir_intrinsic_set_access(intrin, var->data.image.access); + nir_intrinsic_set_format(intrin, var->data.image.format); + + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(index)); +} diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 84a664826e8..583b5a17cc6 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -24,6 +24,7 @@ #include "anv_nir.h" #include "program/prog_parameter.h" #include "nir/nir_builder.h" +#include "compiler/brw_nir.h" struct apply_pipeline_layout_state { nir_shader *shader; @@ -32,6 +33,8 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; + unsigned first_image_uniform; + bool uses_constants; uint8_t constants_offset; struct { @@ -99,6 +102,9 @@ get_used_bindings_block(nir_block *block, case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_param_intel: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: add_deref_src_binding(state, intrin->src[0]); break; @@ -179,6 +185,63 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, } static void +lower_image_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned array_size = + state->layout->set[set].layout->binding[binding].array_size; + + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&intrin->instr); + + nir_ssa_def *index = NULL; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + index = nir_ssa_for_src(b, deref->arr.index, 1); + if (state->add_bounds_checks) + index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); + } else { + index = nir_imm_int(b, 0); + } + + if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) { + b->cursor = nir_instr_remove(&intrin->instr); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); + + nir_intrinsic_set_base(load, state->first_image_uniform + + state->set[set].image_offsets[binding] * + BRW_IMAGE_PARAM_SIZE * 4); + nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4); + + const unsigned param = nir_intrinsic_base(intrin); + nir_ssa_def *offset = + nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4)); + offset = nir_iadd(b, offset, nir_imm_int(b, param * 16)); + load->src[0] = nir_src_for_ssa(offset); + + load->num_components = intrin->dest.ssa.num_components; + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + } else { + unsigned binding_offset = state->set[set].surface_offsets[binding]; + index = nir_iadd(b, index, nir_imm_int(b, binding_offset)); + brw_nir_rewrite_image_intrinsic(intrin, index); + } +} + +static void lower_load_constant(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) { @@ -318,6 +381,23 @@ apply_pipeline_layout_block(nir_block *block, case nir_intrinsic_vulkan_resource_reindex: lower_res_reindex_intrinsic(intrin, state); break; + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_param_intel: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: + lower_image_intrinsic(intrin, state); + break; case nir_intrinsic_load_constant: lower_load_constant(intrin, state); break; @@ -436,6 +516,39 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } } + unsigned image_uniform; + if (map->image_count > 0) { + assert(map->image_count <= MAX_IMAGES); + assert(shader->num_uniforms == prog_data->nr_params * 4); + state.first_image_uniform = shader->num_uniforms; + uint32_t *param = brw_stage_prog_data_add_params(prog_data, + map->image_count * + BRW_IMAGE_PARAM_SIZE); + struct anv_push_constants *null_data = NULL; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < map->image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (uintptr_t)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (uintptr_t)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (uintptr_t)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (uintptr_t)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (uintptr_t)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (uintptr_t)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + assert(param == prog_data->param + prog_data->nr_params); + + shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4; + assert(shader->num_uniforms == prog_data->nr_params * 4); + } + nir_foreach_variable(var, &shader->uniforms) { const struct glsl_type *glsl_type = glsl_without_array(var->type); @@ -479,51 +592,5 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_metadata_dominance); } - if (map->image_count > 0) { - assert(map->image_count <= MAX_IMAGES); - nir_foreach_variable(var, &shader->uniforms) { - if (glsl_type_is_image(var->type) || - (glsl_type_is_array(var->type) && - glsl_type_is_image(glsl_get_array_element(var->type)))) { - /* Images are represented as uniform push constants and the actual - * information required for reading/writing to/from the image is - * storred in the uniform. - */ - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - unsigned image_index = state.set[set].image_offsets[binding]; - - var->data.driver_location = shader->num_uniforms + - image_index * BRW_IMAGE_PARAM_SIZE * 4; - } - } - - uint32_t *param = brw_stage_prog_data_add_params(prog_data, - map->image_count * - BRW_IMAGE_PARAM_SIZE); - struct anv_push_constants *null_data = NULL; - const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < map->image_count; i++) { - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (uintptr_t)&image_param->surface_idx, 1); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (uintptr_t)image_param->offset, 2); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (uintptr_t)image_param->size, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (uintptr_t)image_param->stride, 4); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (uintptr_t)image_param->tiling, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (uintptr_t)image_param->swizzling, 2); - - param += BRW_IMAGE_PARAM_SIZE; - image_param ++; - } - assert(param == prog_data->param + prog_data->nr_params); - - shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4; - } - ralloc_free(mem_ctx); } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 19d59b7fbac..a3eb68769a2 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -523,6 +523,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, if (nir->info.num_ssbos > 0 || nir->info.num_images > 0) pipeline->needs_data_cache = true; + NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo); + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (layout) { anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, @@ -532,8 +534,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, if (nir->info.stage != MESA_SHADER_COMPUTE) brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); - NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo); - assert(nir->num_uniforms == prog_data->nr_params * 4); stage->nir = nir; diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index 54f9f9b1a6b..8a560d9bac1 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -23,6 +23,7 @@ #include "compiler/brw_nir.h" #include "compiler/glsl/ir_uniform.h" +#include "compiler/nir/nir_builder.h" #include "brw_program.h" static void @@ -267,3 +268,132 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO; } } + +static nir_ssa_def * +get_aoa_deref_offset(nir_builder *b, + nir_deref_instr *deref, + unsigned elem_size) +{ + unsigned array_size = elem_size; + nir_ssa_def *offset = nir_imm_int(b, 0); + + while (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + + /* This level's element size is the previous level's array size */ + nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); + assert(deref->arr.index.ssa); + offset = nir_iadd(b, offset, + nir_imul(b, index, nir_imm_int(b, array_size))); + + deref = nir_deref_instr_parent(deref); + assert(glsl_type_is_array(deref->type)); + array_size *= glsl_get_length(deref->type); + } + + /* Accessing an invalid surface index with the dataport can result in a + * hang. According to the spec "if the index used to select an individual + * element is negative or greater than or equal to the size of the array, + * the results of the operation are undefined but may not lead to + * termination" -- which is one of the possible outcomes of the hang. + * Clamp the index to prevent access outside of the array bounds. + */ + return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size)); +} + +void +brw_nir_lower_glsl_images(nir_shader *shader, + const struct gl_program *prog) +{ + /* We put image uniforms at the end */ + nir_foreach_variable(var, &shader->uniforms) { + if (!var->type->contains_image()) + continue; + + /* GL Only allows arrays of arrays of images */ + assert(var->type->without_array()->is_image()); + const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size()); + + var->data.driver_location = shader->num_uniforms; + shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4; + } + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + const unsigned num_images = + MAX2(1, var->type->arrays_of_arrays_size()); + + struct gl_uniform_storage *storage = + &prog->sh.data->UniformStorage[var->data.location]; + const unsigned image_var_idx = + storage->opaque[shader->info.stage].index; + + b.cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx), + get_aoa_deref_offset(&b, deref, 1)); + brw_nir_rewrite_image_intrinsic(intrin, index); + break; + } + + case nir_intrinsic_image_deref_load_param_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + const unsigned num_images = + MAX2(1, var->type->arrays_of_arrays_size()); + + b.cursor = nir_instr_remove(&intrin->instr); + + const unsigned param = nir_intrinsic_base(intrin); + nir_ssa_def *offset = + get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4); + offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16)); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_load_uniform); + nir_intrinsic_set_base(load, var->data.driver_location); + nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4); + load->src[0] = nir_src_for_ssa(offset); + load->num_components = intrin->dest.ssa.num_components; + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(&b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + break; + } + + default: + break; + } + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index f5ebd3c3b05..041395ec4c0 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -140,6 +140,7 @@ brw_create_nir(struct brw_context *brw, } NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar); + NIR_PASS_V(nir, brw_nir_lower_glsl_images, prog); return nir; } |