diff options
-rw-r--r-- | src/intel/vulkan/anv_descriptor_set.c | 89 | ||||
-rw-r--r-- | src/intel/vulkan/anv_device.c | 33 | ||||
-rw-r--r-- | src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 75 | ||||
-rw-r--r-- | src/intel/vulkan/anv_private.h | 32 | ||||
-rw-r--r-- | src/intel/vulkan/genX_cmd_buffer.c | 12 | ||||
-rw-r--r-- | src/intel/vulkan/genX_state.c | 18 |
6 files changed, 228 insertions, 31 deletions
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index c7a90127511..6db6021822a 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -45,15 +45,24 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device, switch (type) { case VK_DESCRIPTOR_TYPE_SAMPLER: data = ANV_DESCRIPTOR_SAMPLER_STATE; + if (device->has_bindless_samplers) + data |= ANV_DESCRIPTOR_SAMPLED_IMAGE; break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: data = ANV_DESCRIPTOR_SURFACE_STATE | ANV_DESCRIPTOR_SAMPLER_STATE; + if (device->has_bindless_images || device->has_bindless_samplers) + data |= ANV_DESCRIPTOR_SAMPLED_IMAGE; break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + data = ANV_DESCRIPTOR_SURFACE_STATE; + if (device->has_bindless_images) + data |= ANV_DESCRIPTOR_SAMPLED_IMAGE; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: data = ANV_DESCRIPTOR_SURFACE_STATE; break; @@ -100,6 +109,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data) { unsigned size = 0; + if (data & ANV_DESCRIPTOR_SAMPLED_IMAGE) + size += sizeof(struct anv_sampled_image_descriptor); + if (data & ANV_DESCRIPTOR_IMAGE_PARAM) size += BRW_IMAGE_PARAM_SIZE * 4; @@ -118,7 +130,17 @@ anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout) return layout->array_size; } - return anv_descriptor_data_size(layout->data); + unsigned size = anv_descriptor_data_size(layout->data); + + /* For multi-planar bindings, we make every descriptor consume the maximum + * number of planes so we don't have to bother with walking arrays and + * adding things up every time. Fortunately, YCbCr samplers aren't all + * that common and likely won't be in the middle of big arrays. + */ + if (layout->max_plane_count > 1) + size *= layout->max_plane_count; + + return size; } /** Returns the size in bytes of each descriptor of the given type @@ -132,7 +154,11 @@ unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice, VkDescriptorType type) { - assert(type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT); + assert(type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT && + type != VK_DESCRIPTOR_TYPE_SAMPLER && + type != VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE && + type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + return anv_descriptor_data_size(anv_descriptor_data_for_type(pdevice, type)); } @@ -146,6 +172,12 @@ anv_descriptor_data_supports_bindless(const struct anv_physical_device *pdevice, return true; } + if (data & ANV_DESCRIPTOR_SAMPLED_IMAGE) { + assert(pdevice->has_bindless_images || pdevice->has_bindless_samplers); + return sampler ? pdevice->has_bindless_samplers : + pdevice->has_bindless_images; + } + return false; } @@ -586,6 +618,13 @@ VkResult anv_CreateDescriptorPool( unsigned desc_data_size = anv_descriptor_data_size(desc_data) * pCreateInfo->pPoolSizes[i].descriptorCount; + /* Combined image sampler descriptors can take up to 3 slots if they + * hold a YCbCr image. + */ + if (pCreateInfo->pPoolSizes[i].type == + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + desc_data_size *= 3; + if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { /* Inline uniform blocks are specified to use the descriptor array @@ -999,6 +1038,18 @@ anv_descriptor_set_write_image_param(uint32_t *param_desc_map, #undef WRITE_PARAM_FIELD } +static uint32_t +anv_surface_state_to_handle(struct anv_state state) +{ + /* Bits 31:12 of the bindless surface offset in the extended message + * descriptor is bits 25:6 of the byte-based address. + */ + assert(state.offset >= 0); + uint32_t offset = state.offset; + assert((offset & 0x3f) == 0 && offset < (1 << 26)); + return offset << 6; +} + void anv_descriptor_set_write_image_view(struct anv_device *device, struct anv_descriptor_set *set, @@ -1057,6 +1108,33 @@ anv_descriptor_set_write_image_view(struct anv_device *device, void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + element * anv_descriptor_size(bind_layout); + if (bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE) { + struct anv_sampled_image_descriptor desc_data[3]; + memset(desc_data, 0, sizeof(desc_data)); + + if (image_view) { + for (unsigned p = 0; p < image_view->n_planes; p++) { + struct anv_surface_state sstate = + (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ? + image_view->planes[p].general_sampler_surface_state : + image_view->planes[p].optimal_sampler_surface_state; + desc_data[p].image = anv_surface_state_to_handle(sstate.state); + } + } + + if (sampler) { + for (unsigned p = 0; p < sampler->n_planes; p++) + desc_data[p].sampler = sampler->bindless_state.offset + p * 32; + } + + /* We may have max_plane_count < 0 if this isn't a sampled image but it + * can be no more than the size of our array of handles. + */ + assert(bind_layout->max_plane_count <= ARRAY_SIZE(desc_data)); + memcpy(desc_map, desc_data, + MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0])); + } + if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) { /* Storage images can only ever have one plane */ assert(image_view->n_planes == 1); @@ -1090,6 +1168,13 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device, void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + element * anv_descriptor_size(bind_layout); + if (bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE) { + struct anv_sampled_image_descriptor desc_data = { + .image = anv_surface_state_to_handle(buffer_view->surface_state), + }; + memcpy(desc_map, &desc_data, sizeof(desc_data)); + } + if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) { anv_descriptor_set_write_image_param(desc_map, &buffer_view->storage_image_param); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index d56e3593353..44fea839f52 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -280,6 +280,10 @@ anv_physical_device_init_uuids(struct anv_physical_device *device) sizeof(device->always_use_bindless)); _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access, sizeof(device->has_a64_buffer_access)); + _mesa_sha1_update(&sha1_ctx, &device->has_bindless_images, + sizeof(device->has_bindless_images)); + _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers, + sizeof(device->has_bindless_samplers)); _mesa_sha1_final(&sha1_ctx, sha1); memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE); @@ -464,6 +468,19 @@ anv_physical_device_init(struct anv_physical_device *device, device->has_a64_buffer_access = device->info.gen >= 8 && device->use_softpin; + /* We first get bindless image access on Skylake and we can only really do + * it if we don't have any relocations so we need softpin. + */ + device->has_bindless_images = device->info.gen >= 9 && + device->use_softpin; + + /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms) + * because it's just a matter of setting the sampler address in the sample + * message header. However, we've not bothered to wire it up for vec4 so + * we leave it disabled on gen7. + */ + device->has_bindless_samplers = device->info.gen >= 8; + /* Starting with Gen10, the timestamp frequency of the command streamer may * vary from one part to another. We can query the value from the kernel. */ @@ -1114,8 +1131,11 @@ void anv_GetPhysicalDeviceProperties( (1ul << 30) : (1ul << 27); const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64; - const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ? - 128 : 16; + const uint32_t max_textures = + pdevice->has_bindless_images ? UINT16_MAX : 128; + const uint32_t max_samplers = + pdevice->has_bindless_samplers ? UINT16_MAX : + (devinfo->gen >= 8 || devinfo->is_haswell) ? 128 : 16; /* The moment we have anything bindless, claim a high per-stage limit */ const uint32_t max_per_stage = @@ -1144,7 +1164,7 @@ void anv_GetPhysicalDeviceProperties( .maxPerStageDescriptorSamplers = max_samplers, .maxPerStageDescriptorUniformBuffers = 64, .maxPerStageDescriptorStorageBuffers = max_ssbos, - .maxPerStageDescriptorSampledImages = max_samplers, + .maxPerStageDescriptorSampledImages = max_textures, .maxPerStageDescriptorStorageImages = MAX_IMAGES, .maxPerStageDescriptorInputAttachments = 64, .maxPerStageResources = max_per_stage, @@ -1153,7 +1173,7 @@ void anv_GetPhysicalDeviceProperties( .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */ .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, - .maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */ + .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */ .maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */ .maxDescriptorSetInputAttachments = 256, .maxVertexInputAttributes = MAX_VBS, @@ -3408,6 +3428,11 @@ void anv_DestroySampler( if (!sampler) return; + if (sampler->bindless_state.map) { + anv_state_pool_free(&device->dynamic_state_pool, + sampler->bindless_state); + } + vk_free2(&device->alloc, pAllocator, sampler); } diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index ab0103cfcd4..800ed2ef3e2 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -760,39 +760,64 @@ lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, unsigned array_size = state->layout->set[set].layout->binding[binding].array_size; - nir_tex_src_type offset_src_type; + unsigned binding_offset; if (deref_src_type == nir_tex_src_texture_deref) { - offset_src_type = nir_tex_src_texture_offset; - *base_index = state->set[set].surface_offsets[binding] + plane; + binding_offset = state->set[set].surface_offsets[binding]; } else { assert(deref_src_type == nir_tex_src_sampler_deref); - offset_src_type = nir_tex_src_sampler_offset; - *base_index = state->set[set].sampler_offsets[binding] + plane; + binding_offset = state->set[set].sampler_offsets[binding]; } + nir_builder *b = &state->builder; + + nir_tex_src_type offset_src_type; nir_ssa_def *index = NULL; - if (deref->deref_type != nir_deref_type_var) { - assert(deref->deref_type == nir_deref_type_array); + if (binding_offset > MAX_BINDING_TABLE_SIZE) { + const unsigned plane_offset = + plane * sizeof(struct anv_sampled_image_descriptor); - if (nir_src_is_const(deref->arr.index)) { - unsigned arr_index = nir_src_as_uint(deref->arr.index); - *base_index += MIN2(arr_index, array_size - 1); + nir_ssa_def *desc = + build_descriptor_load(deref, plane_offset, 2, 32, state); + + if (deref_src_type == nir_tex_src_texture_deref) { + offset_src_type = nir_tex_src_texture_handle; + index = nir_channel(b, desc, 0); } else { - nir_builder *b = &state->builder; - - /* From VK_KHR_sampler_ycbcr_conversion: - * - * If sampler Y’CBCR conversion is enabled, the combined image - * sampler must be indexed only by constant integral expressions when - * aggregated into arrays in shader code, irrespective of the - * shaderSampledImageArrayDynamicIndexing feature. - */ - assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1); + assert(deref_src_type == nir_tex_src_sampler_deref); + offset_src_type = nir_tex_src_sampler_handle; + index = nir_channel(b, desc, 1); + } + } else { + if (deref_src_type == nir_tex_src_texture_deref) { + offset_src_type = nir_tex_src_texture_offset; + } else { + assert(deref_src_type == nir_tex_src_sampler_deref); + offset_src_type = nir_tex_src_sampler_offset; + } - index = nir_ssa_for_src(b, deref->arr.index, 1); + *base_index = binding_offset + plane; - if (state->add_bounds_checks) - index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + + if (nir_src_is_const(deref->arr.index)) { + unsigned arr_index = nir_src_as_uint(deref->arr.index); + *base_index += MIN2(arr_index, array_size - 1); + } else { + /* From VK_KHR_sampler_ycbcr_conversion: + * + * If sampler Y’CBCR conversion is enabled, the combined image + * sampler must be indexed only by constant integral expressions + * when aggregated into arrays in shader code, irrespective of + * the shaderSampledImageArrayDynamicIndexing feature. + */ + assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1); + + index = nir_ssa_for_src(b, deref->arr.index, 1); + + if (state->add_bounds_checks) + index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); + } } } @@ -1062,6 +1087,10 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, anv_descriptor_requires_bindless(pdevice, binding, true)) { /* If this descriptor doesn't fit in the binding table or if it * requires bindless for some reason, flag it as bindless. + * + * We also make large sampler arrays bindless because we can avoid + * using indirect sends thanks to bindless samplers being packed + * less tightly than the sampler table. */ assert(anv_descriptor_supports_bindless(pdevice, binding, true)); state.set[set].sampler_offsets[b] = BINDLESS_OFFSET; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index bb24ff1ae82..9f525d1e21a 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -953,6 +953,10 @@ struct anv_physical_device { /** True if we can access buffers using A64 messages */ bool has_a64_buffer_access; + /** True if we can use bindless access for images */ + bool has_bindless_images; + /** True if we can use bindless access for samplers */ + bool has_bindless_samplers; struct anv_device_extension_table supported_extensions; @@ -1521,6 +1525,27 @@ struct anv_vue_header { float PointWidth; }; +/** Struct representing a sampled image descriptor + * + * This descriptor layout is used for sampled images, bare sampler, and + * combined image/sampler descriptors. + */ +struct anv_sampled_image_descriptor { + /** Bindless image handle + * + * This is expected to already be shifted such that the 20-bit + * SURFACE_STATE table index is in the top 20 bits. + */ + uint32_t image; + + /** Bindless sampler handle + * + * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative + * to the dynamic state base address. + */ + uint32_t sampler; +}; + /** Struct representing a address/range descriptor * * The fields of this struct correspond directly to the data layout of @@ -1547,6 +1572,8 @@ enum anv_descriptor_data { ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4), /** anv_address_range_descriptor with a buffer address and range */ ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5), + /** Bindless surface handle */ + ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6), }; struct anv_descriptor_set_binding_layout { @@ -3454,6 +3481,11 @@ struct anv_sampler { uint32_t state[3][4]; uint32_t n_planes; struct anv_ycbcr_conversion *conversion; + + /* Blob of sampler state data which is guaranteed to be 32-byte aligned + * and with a 32-byte stride for use as bindless samplers. + */ + struct anv_state bindless_state; }; struct anv_framebuffer { diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 3189585cbd3..1af36bced24 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -110,10 +110,18 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.InstructionBuffersizeModifyEnable = true; # endif # if (GEN_GEN >= 9) - sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { NULL, 0 }; + if (cmd_buffer->device->instance->physicalDevice.use_softpin) { + sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { + .bo = device->surface_state_pool.block_pool.bo, + .offset = 0, + }; + sba.BindlessSurfaceStateSize = (1 << 20) - 1; + } else { + sba.BindlessSurfaceStateBaseAddress = ANV_NULL_ADDRESS; + sba.BindlessSurfaceStateSize = 0; + } sba.BindlessSurfaceStateMOCS = GENX(MOCS); sba.BindlessSurfaceStateBaseAddressModifyEnable = true; - sba.BindlessSurfaceStateSize = 0; # endif # if (GEN_GEN >= 10) sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 283cd8c501a..9276dc9470b 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -328,6 +328,8 @@ VkResult genX(CreateSampler)( VkSampler* pSampler) { ANV_FROM_HANDLE(anv_device, device, _device); + const struct anv_physical_device *pdevice = + &device->instance->physicalDevice; struct anv_sampler *sampler; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); @@ -383,6 +385,17 @@ VkResult genX(CreateSampler)( } } + if (pdevice->has_bindless_samplers) { + /* If we have bindless, allocate enough samplers. We allocate 32 bytes + * for each sampler instead of 16 bytes because we want all bindless + * samplers to be 32-byte aligned so we don't have to use indirect + * sampler messages on them. + */ + sampler->bindless_state = + anv_state_pool_alloc(&device->dynamic_state_pool, + sampler->n_planes * 32, 32); + } + for (unsigned p = 0; p < sampler->n_planes; p++) { const bool plane_has_chroma = sampler->conversion && sampler->conversion->format->planes[p].has_chroma; @@ -452,6 +465,11 @@ VkResult genX(CreateSampler)( }; GENX(SAMPLER_STATE_pack)(NULL, sampler->state[p], &sampler_state); + + if (sampler->bindless_state.map) { + memcpy(sampler->bindless_state.map + p * 32, + sampler->state[p], GENX(SAMPLER_STATE_length) * 4); + } } *pSampler = anv_sampler_to_handle(sampler); |