diff options
-rw-r--r-- | src/intel/vulkan/anv_cmd_buffer.c | 37 | ||||
-rw-r--r-- | src/intel/vulkan/anv_descriptor_set.c | 37 | ||||
-rw-r--r-- | src/intel/vulkan/anv_device.c | 14 | ||||
-rw-r--r-- | src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 242 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pipeline.c | 32 | ||||
-rw-r--r-- | src/intel/vulkan/anv_private.h | 20 |
6 files changed, 347 insertions, 35 deletions
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 1b34644a434..981c071fc23 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -594,6 +594,14 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, *dynamic_offsets += set_layout->dynamic_offset_count; *dynamic_offset_count -= set_layout->dynamic_offset_count; + + if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + cmd_buffer->state.push_constants_dirty |= + VK_SHADER_STAGE_COMPUTE_BIT; + } else { + cmd_buffer->state.push_constants_dirty |= + VK_SHADER_STAGE_ALL_GRAPHICS; + } } } @@ -739,7 +747,8 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, } static uint32_t -anv_push_constant_value(struct anv_push_constants *data, uint32_t param) +anv_push_constant_value(const struct anv_cmd_pipeline_state *state, + const struct anv_push_constants *data, uint32_t param) { if (BRW_PARAM_IS_BUILTIN(param)) { switch (param) { @@ -754,20 +763,28 @@ anv_push_constant_value(struct anv_push_constants *data, uint32_t param) default: unreachable("Invalid param builtin"); } - } else { + } else if (ANV_PARAM_IS_PUSH(param)) { uint32_t offset = ANV_PARAM_PUSH_OFFSET(param); assert(offset % sizeof(uint32_t) == 0); if (offset < data->size) return *(uint32_t *)((uint8_t *)data + offset); else return 0; + } else if (ANV_PARAM_IS_DYN_OFFSET(param)) { + unsigned idx = ANV_PARAM_DYN_OFFSET_IDX(param); + assert(idx < MAX_DYNAMIC_BUFFERS); + return state->dynamic_offsets[idx]; } + + assert(!"Invalid param"); + return 0; } struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) { + struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.gfx.base; struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; /* If we don't have this stage, bail. */ @@ -780,7 +797,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, pipeline->shaders[stage]->prog_data; /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) + if (prog_data == NULL || prog_data->nr_params == 0) return (struct anv_state) { .offset = 0 }; struct anv_state state = @@ -790,8 +807,10 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, /* Walk through the param array and fill the buffer with data */ uint32_t *u32_map = state.map; - for (unsigned i = 0; i < prog_data->nr_params; i++) - u32_map[i] = anv_push_constant_value(data, prog_data->param[i]); + for (unsigned i = 0; i < prog_data->nr_params; i++) { + u32_map[i] = anv_push_constant_value(pipeline_state, data, + prog_data->param[i]); + } return state; } @@ -799,6 +818,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, struct anv_state anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) { + struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.compute.base; struct anv_push_constants *data = cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline; @@ -826,7 +846,8 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) i < cs_prog_data->push.cross_thread.dwords; i++) { assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID); - u32_map[i] = anv_push_constant_value(data, prog_data->param[i]); + u32_map[i] = anv_push_constant_value(pipeline_state, data, + prog_data->param[i]); } } @@ -840,8 +861,8 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) { u32_map[dst] = t; } else { - u32_map[dst] = - anv_push_constant_value(data, prog_data->param[src]); + u32_map[dst] = anv_push_constant_value(pipeline_state, data, + prog_data->param[src]); } } } diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 90a02997a8d..85915cfb9d0 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -84,6 +84,14 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device, unreachable("Unsupported descriptor type"); } + /* On gen8 and above when we have softpin enabled, we also need to push + * SSBO address ranges so that we can use A64 messages in the shader. + */ + if (device->has_a64_buffer_access && + (type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) + data |= ANV_DESCRIPTOR_ADDRESS_RANGE; + return data; } @@ -95,6 +103,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data) if (data & ANV_DESCRIPTOR_IMAGE_PARAM) size += BRW_IMAGE_PARAM_SIZE * 4; + if (data & ANV_DESCRIPTOR_ADDRESS_RANGE) + size += sizeof(struct anv_address_range_descriptor); + return size; } @@ -130,6 +141,11 @@ anv_descriptor_data_supports_bindless(const struct anv_physical_device *pdevice, enum anv_descriptor_data data, bool sampler) { + if (data & ANV_DESCRIPTOR_ADDRESS_RANGE) { + assert(pdevice->has_a64_buffer_access); + return true; + } + return false; } @@ -1077,6 +1093,9 @@ anv_descriptor_set_write_buffer(struct anv_device *device, assert(type == bind_layout->type); + struct anv_address bind_addr = anv_address_add(buffer->address, offset); + uint64_t bind_range = anv_buffer_get_range(buffer, offset, range); + if (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { *desc = (struct anv_descriptor) { @@ -1091,8 +1110,8 @@ anv_descriptor_set_write_buffer(struct anv_device *device, &set->buffer_views[bind_layout->buffer_view_index + element]; bview->format = anv_isl_format_for_descriptor_type(type); - bview->range = anv_buffer_get_range(buffer, offset, range); - bview->address = anv_address_add(buffer->address, offset); + bview->range = bind_range; + bview->address = bind_addr; /* If we're writing descriptors through a push command, we need to * allocate the surface state from the command buffer. Otherwise it will @@ -1102,14 +1121,24 @@ anv_descriptor_set_write_buffer(struct anv_device *device, bview->surface_state = anv_state_stream_alloc(alloc_stream, 64, 64); anv_fill_buffer_surface_state(device, bview->surface_state, - bview->format, - bview->address, bview->range, 1); + bview->format, bind_addr, bind_range, 1); *desc = (struct anv_descriptor) { .type = type, .buffer_view = bview, }; } + + void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + + element * anv_descriptor_size(bind_layout); + + if (bind_layout->data & ANV_DESCRIPTOR_ADDRESS_RANGE) { + struct anv_address_range_descriptor desc = { + .address = anv_address_physical(bind_addr), + .range = bind_range, + }; + memcpy(desc_map, &desc, sizeof(desc)); + } } void diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 8c60b917050..de56926d935 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -278,6 +278,8 @@ anv_physical_device_init_uuids(struct anv_physical_device *device) sizeof(device->chipset_id)); _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless, sizeof(device->always_use_bindless)); + _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access, + sizeof(device->has_a64_buffer_access)); _mesa_sha1_final(&sha1_ctx, sha1); memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE); @@ -1103,9 +1105,15 @@ void anv_GetPhysicalDeviceProperties( const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ? (1ul << 30) : (1ul << 27); + const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64; const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ? 128 : 16; + /* The moment we have anything bindless, claim a high per-stage limit */ + const uint32_t max_per_stage = + pdevice->has_a64_buffer_access ? UINT32_MAX : + MAX_BINDING_TABLE_SIZE - MAX_RTS; + VkSampleCountFlags sample_counts = isl_device_get_sample_counts(&pdevice->isl_dev); @@ -1127,15 +1135,15 @@ void anv_GetPhysicalDeviceProperties( .maxBoundDescriptorSets = MAX_SETS, .maxPerStageDescriptorSamplers = max_samplers, .maxPerStageDescriptorUniformBuffers = 64, - .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorStorageBuffers = max_ssbos, .maxPerStageDescriptorSampledImages = max_samplers, .maxPerStageDescriptorStorageImages = MAX_IMAGES, .maxPerStageDescriptorInputAttachments = 64, - .maxPerStageResources = MAX_BINDING_TABLE_SIZE - MAX_RTS, + .maxPerStageResources = max_per_stage, .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */ .maxDescriptorSetUniformBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorUniformBuffers */ .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, - .maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */ + .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */ .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, .maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */ .maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */ diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 7abc27be103..356a56e47bb 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -45,6 +45,8 @@ struct apply_pipeline_layout_state { /* Place to flag lowered instructions so we don't lower them twice */ struct set *lowered_instrs; + int dynamic_offset_uniform_start; + bool uses_constants; uint8_t constants_offset; struct { @@ -159,7 +161,12 @@ find_descriptor_for_index_src(nir_src src, if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index) return false; - return true; + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + uint32_t surface_index = state->set[set].surface_offsets[binding]; + + /* Only lower to a BTI message if we have a valid binding table index. */ + return surface_index < MAX_BINDING_TABLE_SIZE; } static bool @@ -327,6 +334,7 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, uint32_t set = nir_intrinsic_desc_set(intrin); uint32_t binding = nir_intrinsic_binding(intrin); + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); const struct anv_descriptor_set_binding_layout *bind_layout = &state->layout->set[set].layout->binding[binding]; @@ -339,14 +347,55 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1)); nir_ssa_def *index; - if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) { + if (state->pdevice->has_a64_buffer_access && + (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) { + /* We store the descriptor offset as 16.8.8 where the top 16 bits are + * the offset into the descriptor set, the next 8 are the binding table + * index of the descriptor buffer, and the bottom 8 bits are the offset + * (in bytes) into the dynamic offset table. + */ + assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS); + uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */ + if (bind_layout->dynamic_offset_index >= 0) { + dynamic_offset_index = + state->layout->set[set].dynamic_offset_start + + bind_layout->dynamic_offset_index; + } + + const uint32_t desc_offset = + bind_layout->descriptor_offset << 16 | + (uint32_t)state->set[set].desc_offset << 8 | + dynamic_offset_index; + + if (state->add_bounds_checks) { + /* We're using nir_address_format_64bit_bounded_global */ + assert(intrin->dest.ssa.num_components == 4); + assert(intrin->dest.ssa.bit_size == 32); + index = nir_vec4(b, nir_imm_int(b, desc_offset), + nir_ssa_for_src(b, intrin->src[0], 1), + nir_imm_int(b, array_size - 1), + nir_ssa_undef(b, 1, 32)); + } else { + /* We're using nir_address_format_64bit_global */ + assert(intrin->dest.ssa.num_components == 1); + assert(intrin->dest.ssa.bit_size == 64); + index = nir_pack_64_2x32_split(b, nir_imm_int(b, desc_offset), + nir_ssa_for_src(b, intrin->src[0], 1)); + } + } else if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) { /* This is an inline uniform block. Just reference the descriptor set - * and use the descriptor offset as the base. + * and use the descriptor offset as the base. Inline uniforms always + * use nir_address_format_32bit_index_offset */ + assert(intrin->dest.ssa.num_components == 2); + assert(intrin->dest.ssa.bit_size == 32); index = nir_imm_ivec2(b, state->set[set].desc_offset, bind_layout->descriptor_offset); } else { /* We're using nir_address_format_32bit_index_offset */ + assert(intrin->dest.ssa.num_components == 2); + assert(intrin->dest.ssa.bit_size == 32); index = nir_vec2(b, nir_iadd_imm(b, array_index, surface_index), nir_imm_int(b, 0)); } @@ -364,6 +413,8 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, b->cursor = nir_before_instr(&intrin->instr); + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + /* For us, the resource indices are just indices into the binding table and * array elements are sequential. A resource_reindex just turns into an * add of the two indices. @@ -372,15 +423,81 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *old_index = intrin->src[0].ssa; nir_ssa_def *offset = intrin->src[1].ssa; - nir_ssa_def *new_index = - nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset), - nir_channel(b, old_index, 1)); + nir_ssa_def *new_index; + if (state->pdevice->has_a64_buffer_access && + (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) { + if (state->add_bounds_checks) { + /* We're using nir_address_format_64bit_bounded_global */ + assert(intrin->dest.ssa.num_components == 4); + assert(intrin->dest.ssa.bit_size == 32); + new_index = nir_vec4(b, nir_channel(b, old_index, 0), + nir_iadd(b, nir_channel(b, old_index, 1), + offset), + nir_channel(b, old_index, 2), + nir_ssa_undef(b, 1, 32)); + } else { + /* We're using nir_address_format_64bit_global */ + assert(intrin->dest.ssa.num_components == 1); + assert(intrin->dest.ssa.bit_size == 64); + nir_ssa_def *base = nir_unpack_64_2x32_split_x(b, old_index); + nir_ssa_def *arr_idx = nir_unpack_64_2x32_split_y(b, old_index); + new_index = nir_pack_64_2x32_split(b, base, nir_iadd(b, arr_idx, offset)); + } + } else { + /* We're using nir_address_format_32bit_index_offset */ + assert(intrin->dest.ssa.num_components == 2); + assert(intrin->dest.ssa.bit_size == 32); + new_index = nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset), + nir_channel(b, old_index, 1)); + } assert(intrin->dest.is_ssa); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(new_index)); nir_instr_remove(&intrin->instr); } +static nir_ssa_def * +build_ssbo_descriptor_load(const VkDescriptorType desc_type, + nir_ssa_def *index, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + nir_ssa_def *desc_offset, *array_index; + if (state->add_bounds_checks) { + /* We're using nir_address_format_64bit_bounded_global */ + desc_offset = nir_channel(b, index, 0); + array_index = nir_umin(b, nir_channel(b, index, 1), + nir_channel(b, index, 2)); + } else { + desc_offset = nir_unpack_64_2x32_split_x(b, index); + array_index = nir_unpack_64_2x32_split_y(b, index); + } + + /* The desc_offset is actually 16.8.8 */ + nir_ssa_def *desc_buffer_index = + nir_extract_u8(b, desc_offset, nir_imm_int(b, 1)); + nir_ssa_def *desc_offset_base = + nir_extract_u16(b, desc_offset, nir_imm_int(b, 1)); + + /* Compute the actual descriptor offset */ + const unsigned descriptor_size = + anv_descriptor_type_size(state->pdevice, desc_type); + desc_offset = nir_iadd(b, desc_offset_base, + nir_imul_imm(b, array_index, descriptor_size)); + + nir_intrinsic_instr *desc_load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); + desc_load->src[0] = nir_src_for_ssa(desc_buffer_index); + desc_load->src[1] = nir_src_for_ssa(desc_offset); + desc_load->num_components = 4; + nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 4, 32, NULL); + nir_builder_instr_insert(b, &desc_load->instr); + + return &desc_load->dest.ssa; +} + static void lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) @@ -389,12 +506,84 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, b->cursor = nir_before_instr(&intrin->instr); - /* We follow the nir_address_format_32bit_index_offset model */ + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + assert(intrin->src[0].is_ssa); nir_ssa_def *index = intrin->src[0].ssa; + nir_ssa_def *desc; + if (state->pdevice->has_a64_buffer_access && + (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) { + desc = build_ssbo_descriptor_load(desc_type, index, state); + + /* We want nir_address_format_64bit_global */ + if (!state->add_bounds_checks) + desc = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); + + if (state->dynamic_offset_uniform_start >= 0) { + /* This shader has dynamic offsets and we have no way of knowing + * (save from the dynamic offset base index) if this buffer has a + * dynamic offset. + */ + nir_ssa_def *desc_offset, *array_index; + if (state->add_bounds_checks) { + /* We're using nir_address_format_64bit_bounded_global */ + desc_offset = nir_channel(b, index, 0); + array_index = nir_umin(b, nir_channel(b, index, 1), + nir_channel(b, index, 2)); + } else { + desc_offset = nir_unpack_64_2x32_split_x(b, index); + array_index = nir_unpack_64_2x32_split_y(b, index); + } + + nir_ssa_def *dyn_offset_base = + nir_extract_u8(b, desc_offset, nir_imm_int(b, 0)); + nir_ssa_def *dyn_offset_idx = + nir_iadd(b, dyn_offset_base, array_index); + if (state->add_bounds_checks) { + dyn_offset_idx = nir_umin(b, dyn_offset_idx, + nir_imm_int(b, MAX_DYNAMIC_BUFFERS)); + } + + nir_intrinsic_instr *dyn_load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); + nir_intrinsic_set_base(dyn_load, state->dynamic_offset_uniform_start); + nir_intrinsic_set_range(dyn_load, MAX_DYNAMIC_BUFFERS * 4); + dyn_load->src[0] = nir_src_for_ssa(nir_imul_imm(b, dyn_offset_idx, 4)); + dyn_load->num_components = 1; + nir_ssa_dest_init(&dyn_load->instr, &dyn_load->dest, 1, 32, NULL); + nir_builder_instr_insert(b, &dyn_load->instr); + + nir_ssa_def *dynamic_offset = + nir_bcsel(b, nir_ieq(b, dyn_offset_base, nir_imm_int(b, 0xff)), + nir_imm_int(b, 0), &dyn_load->dest.ssa); + + if (state->add_bounds_checks) { + /* The dynamic offset gets added to the base pointer so that we + * have a sliding window range. + * + * We're using nir_address_format_64bit_bounded_global. + */ + nir_ssa_def *base_ptr = + nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); + base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset)); + desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr), + nir_unpack_64_2x32_split_y(b, base_ptr), + nir_channel(b, desc, 2), + nir_channel(b, desc, 3)); + } else { + /* We're using nir_address_format_64bit_global */ + desc = nir_iadd(b, desc, nir_u2u64(b, dynamic_offset)); + } + } + } else { + /* We follow the nir_address_format_32bit_index_offset model */ + desc = index; + } + assert(intrin->dest.is_ssa); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(index)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc)); nir_instr_remove(&intrin->instr); } @@ -409,15 +598,24 @@ lower_get_buffer_size(nir_intrinsic_instr *intrin, b->cursor = nir_before_instr(&intrin->instr); + const VkDescriptorType desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + assert(intrin->src[0].is_ssa); nir_ssa_def *index = intrin->src[0].ssa; - /* We're following the nir_address_format_32bit_index_offset model so the - * binding table index is the first component of the address. The - * back-end wants a scalar binding table index source. - */ - nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], - nir_src_for_ssa(nir_channel(b, index, 0))); + if (state->pdevice->has_a64_buffer_access) { + nir_ssa_def *desc = build_ssbo_descriptor_load(desc_type, index, state); + nir_ssa_def *size = nir_channel(b, desc, 2); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(size)); + nir_instr_remove(&intrin->instr); + } else { + /* We're following the nir_address_format_32bit_index_offset model so + * the binding table index is the first component of the address. The + * back-end wants a scalar binding table index source. + */ + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(nir_channel(b, index, 0))); + } } static nir_ssa_def * @@ -724,6 +922,7 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, .layout = layout, .add_bounds_checks = robust_buffer_access, .lowered_instrs = _mesa_pointer_set_create(mem_ctx), + .dynamic_offset_uniform_start = -1, }; for (unsigned s = 0; s < layout->num_sets; s++) { @@ -813,11 +1012,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, qsort(infos, used_binding_count, sizeof(struct binding_info), compare_binding_infos); + bool have_dynamic_buffers = false; + for (unsigned i = 0; i < used_binding_count; i++) { unsigned set = infos[i].set, b = infos[i].binding; struct anv_descriptor_set_binding_layout *binding = &layout->set[set].layout->binding[b]; + if (binding->dynamic_offset_index >= 0) + have_dynamic_buffers = true; + const uint32_t array_size = binding->array_size; if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) { @@ -874,6 +1078,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, } } + if (have_dynamic_buffers) { + state.dynamic_offset_uniform_start = shader->num_uniforms; + uint32_t *param = brw_stage_prog_data_add_params(prog_data, + MAX_DYNAMIC_BUFFERS); + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) + param[i] = ANV_PARAM_DYN_OFFSET(i); + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 4; + assert(shader->num_uniforms == prog_data->nr_params * 4); + } + nir_foreach_variable(var, &shader->uniforms) { const struct glsl_type *glsl_type = glsl_without_array(var->type); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 2dd60f2dd2c..b0ed2187376 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -166,12 +166,20 @@ anv_shader_compile_to_nir(struct anv_device *device, .variable_pointers = true, }, .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), - .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), .phys_ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1), .push_const_ptr_type = glsl_uint_type(), .shared_ptr_type = glsl_uint_type(), }; + if (pdevice->has_a64_buffer_access) { + if (device->robust_buffer_access) + spirv_options.ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 4); + else + spirv_options.ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1); + } else { + spirv_options.ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2); + } + nir_function *entry_point = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, @@ -553,8 +561,9 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, struct anv_pipeline_stage *stage, struct anv_pipeline_layout *layout) { - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; + const struct anv_physical_device *pdevice = + &pipeline->device->instance->physicalDevice; + const struct brw_compiler *compiler = pdevice->compiler; struct brw_stage_prog_data *prog_data = &stage->prog_data.base; nir_shader *nir = stage->nir; @@ -607,15 +616,26 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (layout) { - anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice, + anv_nir_apply_pipeline_layout(pdevice, pipeline->device->robust_buffer_access, layout, nir, prog_data, &stage->bind_map); - NIR_PASS_V(nir, nir_lower_explicit_io, - nir_var_mem_ubo | nir_var_mem_ssbo, + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, nir_address_format_32bit_index_offset); + nir_address_format ssbo_address_format; + if (pdevice->has_a64_buffer_access) { + if (pipeline->device->robust_buffer_access) + ssbo_address_format = nir_address_format_64bit_bounded_global; + else + ssbo_address_format = nir_address_format_64bit_global; + } else { + ssbo_address_format = nir_address_format_32bit_index_offset; + } + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, + ssbo_address_format); + NIR_PASS_V(nir, nir_opt_constant_folding); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ac63ab8b3be..9c747fa019c 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1521,6 +1521,19 @@ struct anv_vue_header { float PointWidth; }; +/** Struct representing a address/range descriptor + * + * The fields of this struct correspond directly to the data layout of + * nir_address_format_64bit_bounded_global addresses. The last field is the + * offset in the NIR address so it must be zero so that when you load the + * descriptor you get a pointer to the start of the range. + */ +struct anv_address_range_descriptor { + uint64_t address; + uint32_t range; + uint32_t zero; +}; + enum anv_descriptor_data { /** The descriptor contains a BTI reference to a surface state */ ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0), @@ -1532,6 +1545,8 @@ enum anv_descriptor_data { ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3), /** The descriptor contains auxiliary image layout data */ ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4), + /** anv_address_range_descriptor with a buffer address and range */ + ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5), }; struct anv_descriptor_set_binding_layout { @@ -2086,8 +2101,13 @@ struct anv_xfb_binding { }; #define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset)) +#define ANV_PARAM_IS_PUSH(param) ((uint32_t)(param) >> 16 == 1) #define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff) +#define ANV_PARAM_DYN_OFFSET(offset) ((2 << 16) | (uint32_t)(offset)) +#define ANV_PARAM_IS_DYN_OFFSET(param) ((uint32_t)(param) >> 16 == 2) +#define ANV_PARAM_DYN_OFFSET_IDX(param) ((param) & 0xffff) + struct anv_push_constants { /* Current allocated size of this push constants data structure. * Because a decent chunk of it may not be used (images on SKL, for |