summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/vulkan/anv_descriptor_set.c89
-rw-r--r--src/intel/vulkan/anv_device.c33
-rw-r--r--src/intel/vulkan/anv_nir_apply_pipeline_layout.c75
-rw-r--r--src/intel/vulkan/anv_private.h32
-rw-r--r--src/intel/vulkan/genX_cmd_buffer.c12
-rw-r--r--src/intel/vulkan/genX_state.c18
6 files changed, 228 insertions, 31 deletions
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c
index c7a90127511..6db6021822a 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -45,15 +45,24 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
switch (type) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
data = ANV_DESCRIPTOR_SAMPLER_STATE;
+ if (device->has_bindless_samplers)
+ data |= ANV_DESCRIPTOR_SAMPLED_IMAGE;
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
data = ANV_DESCRIPTOR_SURFACE_STATE |
ANV_DESCRIPTOR_SAMPLER_STATE;
+ if (device->has_bindless_images || device->has_bindless_samplers)
+ data |= ANV_DESCRIPTOR_SAMPLED_IMAGE;
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ data = ANV_DESCRIPTOR_SURFACE_STATE;
+ if (device->has_bindless_images)
+ data |= ANV_DESCRIPTOR_SAMPLED_IMAGE;
+ break;
+
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
data = ANV_DESCRIPTOR_SURFACE_STATE;
break;
@@ -100,6 +109,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data)
{
unsigned size = 0;
+ if (data & ANV_DESCRIPTOR_SAMPLED_IMAGE)
+ size += sizeof(struct anv_sampled_image_descriptor);
+
if (data & ANV_DESCRIPTOR_IMAGE_PARAM)
size += BRW_IMAGE_PARAM_SIZE * 4;
@@ -118,7 +130,17 @@ anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout)
return layout->array_size;
}
- return anv_descriptor_data_size(layout->data);
+ unsigned size = anv_descriptor_data_size(layout->data);
+
+ /* For multi-planar bindings, we make every descriptor consume the maximum
+ * number of planes so we don't have to bother with walking arrays and
+ * adding things up every time. Fortunately, YCbCr samplers aren't all
+ * that common and likely won't be in the middle of big arrays.
+ */
+ if (layout->max_plane_count > 1)
+ size *= layout->max_plane_count;
+
+ return size;
}
/** Returns the size in bytes of each descriptor of the given type
@@ -132,7 +154,11 @@ unsigned
anv_descriptor_type_size(const struct anv_physical_device *pdevice,
VkDescriptorType type)
{
- assert(type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
+ assert(type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT &&
+ type != VK_DESCRIPTOR_TYPE_SAMPLER &&
+ type != VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE &&
+ type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
+
return anv_descriptor_data_size(anv_descriptor_data_for_type(pdevice, type));
}
@@ -146,6 +172,12 @@ anv_descriptor_data_supports_bindless(const struct anv_physical_device *pdevice,
return true;
}
+ if (data & ANV_DESCRIPTOR_SAMPLED_IMAGE) {
+ assert(pdevice->has_bindless_images || pdevice->has_bindless_samplers);
+ return sampler ? pdevice->has_bindless_samplers :
+ pdevice->has_bindless_images;
+ }
+
return false;
}
@@ -586,6 +618,13 @@ VkResult anv_CreateDescriptorPool(
unsigned desc_data_size = anv_descriptor_data_size(desc_data) *
pCreateInfo->pPoolSizes[i].descriptorCount;
+ /* Combined image sampler descriptors can take up to 3 slots if they
+ * hold a YCbCr image.
+ */
+ if (pCreateInfo->pPoolSizes[i].type ==
+ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ desc_data_size *= 3;
+
if (pCreateInfo->pPoolSizes[i].type ==
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
/* Inline uniform blocks are specified to use the descriptor array
@@ -999,6 +1038,18 @@ anv_descriptor_set_write_image_param(uint32_t *param_desc_map,
#undef WRITE_PARAM_FIELD
}
+static uint32_t
+anv_surface_state_to_handle(struct anv_state state)
+{
+ /* Bits 31:12 of the bindless surface offset in the extended message
+ * descriptor is bits 25:6 of the byte-based address.
+ */
+ assert(state.offset >= 0);
+ uint32_t offset = state.offset;
+ assert((offset & 0x3f) == 0 && offset < (1 << 26));
+ return offset << 6;
+}
+
void
anv_descriptor_set_write_image_view(struct anv_device *device,
struct anv_descriptor_set *set,
@@ -1057,6 +1108,33 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
element * anv_descriptor_size(bind_layout);
+ if (bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE) {
+ struct anv_sampled_image_descriptor desc_data[3];
+ memset(desc_data, 0, sizeof(desc_data));
+
+ if (image_view) {
+ for (unsigned p = 0; p < image_view->n_planes; p++) {
+ struct anv_surface_state sstate =
+ (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
+ image_view->planes[p].general_sampler_surface_state :
+ image_view->planes[p].optimal_sampler_surface_state;
+ desc_data[p].image = anv_surface_state_to_handle(sstate.state);
+ }
+ }
+
+ if (sampler) {
+ for (unsigned p = 0; p < sampler->n_planes; p++)
+ desc_data[p].sampler = sampler->bindless_state.offset + p * 32;
+ }
+
+ /* We may have max_plane_count < 0 if this isn't a sampled image but it
+ * can be no more than the size of our array of handles.
+ */
+ assert(bind_layout->max_plane_count <= ARRAY_SIZE(desc_data));
+ memcpy(desc_map, desc_data,
+ MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0]));
+ }
+
if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
/* Storage images can only ever have one plane */
assert(image_view->n_planes == 1);
@@ -1090,6 +1168,13 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device,
void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
element * anv_descriptor_size(bind_layout);
+ if (bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE) {
+ struct anv_sampled_image_descriptor desc_data = {
+ .image = anv_surface_state_to_handle(buffer_view->surface_state),
+ };
+ memcpy(desc_map, &desc_data, sizeof(desc_data));
+ }
+
if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
anv_descriptor_set_write_image_param(desc_map,
&buffer_view->storage_image_param);
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index d56e3593353..44fea839f52 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -280,6 +280,10 @@ anv_physical_device_init_uuids(struct anv_physical_device *device)
sizeof(device->always_use_bindless));
_mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
sizeof(device->has_a64_buffer_access));
+ _mesa_sha1_update(&sha1_ctx, &device->has_bindless_images,
+ sizeof(device->has_bindless_images));
+ _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
+ sizeof(device->has_bindless_samplers));
_mesa_sha1_final(&sha1_ctx, sha1);
memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
@@ -464,6 +468,19 @@ anv_physical_device_init(struct anv_physical_device *device,
device->has_a64_buffer_access = device->info.gen >= 8 &&
device->use_softpin;
+ /* We first get bindless image access on Skylake and we can only really do
+ * it if we don't have any relocations so we need softpin.
+ */
+ device->has_bindless_images = device->info.gen >= 9 &&
+ device->use_softpin;
+
+ /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
+ * because it's just a matter of setting the sampler address in the sample
+ * message header. However, we've not bothered to wire it up for vec4 so
+ * we leave it disabled on gen7.
+ */
+ device->has_bindless_samplers = device->info.gen >= 8;
+
/* Starting with Gen10, the timestamp frequency of the command streamer may
* vary from one part to another. We can query the value from the kernel.
*/
@@ -1114,8 +1131,11 @@ void anv_GetPhysicalDeviceProperties(
(1ul << 30) : (1ul << 27);
const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
- const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
- 128 : 16;
+ const uint32_t max_textures =
+ pdevice->has_bindless_images ? UINT16_MAX : 128;
+ const uint32_t max_samplers =
+ pdevice->has_bindless_samplers ? UINT16_MAX :
+ (devinfo->gen >= 8 || devinfo->is_haswell) ? 128 : 16;
/* The moment we have anything bindless, claim a high per-stage limit */
const uint32_t max_per_stage =
@@ -1144,7 +1164,7 @@ void anv_GetPhysicalDeviceProperties(
.maxPerStageDescriptorSamplers = max_samplers,
.maxPerStageDescriptorUniformBuffers = 64,
.maxPerStageDescriptorStorageBuffers = max_ssbos,
- .maxPerStageDescriptorSampledImages = max_samplers,
+ .maxPerStageDescriptorSampledImages = max_textures,
.maxPerStageDescriptorStorageImages = MAX_IMAGES,
.maxPerStageDescriptorInputAttachments = 64,
.maxPerStageResources = max_per_stage,
@@ -1153,7 +1173,7 @@ void anv_GetPhysicalDeviceProperties(
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
.maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
- .maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */
+ .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
.maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */
.maxDescriptorSetInputAttachments = 256,
.maxVertexInputAttributes = MAX_VBS,
@@ -3408,6 +3428,11 @@ void anv_DestroySampler(
if (!sampler)
return;
+ if (sampler->bindless_state.map) {
+ anv_state_pool_free(&device->dynamic_state_pool,
+ sampler->bindless_state);
+ }
+
vk_free2(&device->alloc, pAllocator, sampler);
}
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index ab0103cfcd4..800ed2ef3e2 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -760,39 +760,64 @@ lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
unsigned array_size =
state->layout->set[set].layout->binding[binding].array_size;
- nir_tex_src_type offset_src_type;
+ unsigned binding_offset;
if (deref_src_type == nir_tex_src_texture_deref) {
- offset_src_type = nir_tex_src_texture_offset;
- *base_index = state->set[set].surface_offsets[binding] + plane;
+ binding_offset = state->set[set].surface_offsets[binding];
} else {
assert(deref_src_type == nir_tex_src_sampler_deref);
- offset_src_type = nir_tex_src_sampler_offset;
- *base_index = state->set[set].sampler_offsets[binding] + plane;
+ binding_offset = state->set[set].sampler_offsets[binding];
}
+ nir_builder *b = &state->builder;
+
+ nir_tex_src_type offset_src_type;
nir_ssa_def *index = NULL;
- if (deref->deref_type != nir_deref_type_var) {
- assert(deref->deref_type == nir_deref_type_array);
+ if (binding_offset > MAX_BINDING_TABLE_SIZE) {
+ const unsigned plane_offset =
+ plane * sizeof(struct anv_sampled_image_descriptor);
- if (nir_src_is_const(deref->arr.index)) {
- unsigned arr_index = nir_src_as_uint(deref->arr.index);
- *base_index += MIN2(arr_index, array_size - 1);
+ nir_ssa_def *desc =
+ build_descriptor_load(deref, plane_offset, 2, 32, state);
+
+ if (deref_src_type == nir_tex_src_texture_deref) {
+ offset_src_type = nir_tex_src_texture_handle;
+ index = nir_channel(b, desc, 0);
} else {
- nir_builder *b = &state->builder;
-
- /* From VK_KHR_sampler_ycbcr_conversion:
- *
- * If sampler Y’CBCR conversion is enabled, the combined image
- * sampler must be indexed only by constant integral expressions when
- * aggregated into arrays in shader code, irrespective of the
- * shaderSampledImageArrayDynamicIndexing feature.
- */
- assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
+ assert(deref_src_type == nir_tex_src_sampler_deref);
+ offset_src_type = nir_tex_src_sampler_handle;
+ index = nir_channel(b, desc, 1);
+ }
+ } else {
+ if (deref_src_type == nir_tex_src_texture_deref) {
+ offset_src_type = nir_tex_src_texture_offset;
+ } else {
+ assert(deref_src_type == nir_tex_src_sampler_deref);
+ offset_src_type = nir_tex_src_sampler_offset;
+ }
- index = nir_ssa_for_src(b, deref->arr.index, 1);
+ *base_index = binding_offset + plane;
- if (state->add_bounds_checks)
- index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
+ if (deref->deref_type != nir_deref_type_var) {
+ assert(deref->deref_type == nir_deref_type_array);
+
+ if (nir_src_is_const(deref->arr.index)) {
+ unsigned arr_index = nir_src_as_uint(deref->arr.index);
+ *base_index += MIN2(arr_index, array_size - 1);
+ } else {
+ /* From VK_KHR_sampler_ycbcr_conversion:
+ *
+ * If sampler Y’CBCR conversion is enabled, the combined image
+ * sampler must be indexed only by constant integral expressions
+ * when aggregated into arrays in shader code, irrespective of
+ * the shaderSampledImageArrayDynamicIndexing feature.
+ */
+ assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
+
+ index = nir_ssa_for_src(b, deref->arr.index, 1);
+
+ if (state->add_bounds_checks)
+ index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
+ }
}
}
@@ -1062,6 +1087,10 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
anv_descriptor_requires_bindless(pdevice, binding, true)) {
/* If this descriptor doesn't fit in the binding table or if it
* requires bindless for some reason, flag it as bindless.
+ *
+ * We also make large sampler arrays bindless because we can avoid
+ * using indirect sends thanks to bindless samplers being packed
+ * less tightly than the sampler table.
*/
assert(anv_descriptor_supports_bindless(pdevice, binding, true));
state.set[set].sampler_offsets[b] = BINDLESS_OFFSET;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index bb24ff1ae82..9f525d1e21a 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -953,6 +953,10 @@ struct anv_physical_device {
/** True if we can access buffers using A64 messages */
bool has_a64_buffer_access;
+ /** True if we can use bindless access for images */
+ bool has_bindless_images;
+ /** True if we can use bindless access for samplers */
+ bool has_bindless_samplers;
struct anv_device_extension_table supported_extensions;
@@ -1521,6 +1525,27 @@ struct anv_vue_header {
float PointWidth;
};
+/** Struct representing a sampled image descriptor
+ *
+ * This descriptor layout is used for sampled images, bare sampler, and
+ * combined image/sampler descriptors.
+ */
+struct anv_sampled_image_descriptor {
+ /** Bindless image handle
+ *
+ * This is expected to already be shifted such that the 20-bit
+ * SURFACE_STATE table index is in the top 20 bits.
+ */
+ uint32_t image;
+
+ /** Bindless sampler handle
+ *
+ * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
+ * to the dynamic state base address.
+ */
+ uint32_t sampler;
+};
+
/** Struct representing a address/range descriptor
*
* The fields of this struct correspond directly to the data layout of
@@ -1547,6 +1572,8 @@ enum anv_descriptor_data {
ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
/** anv_address_range_descriptor with a buffer address and range */
ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5),
+ /** Bindless surface handle */
+ ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
};
struct anv_descriptor_set_binding_layout {
@@ -3454,6 +3481,11 @@ struct anv_sampler {
uint32_t state[3][4];
uint32_t n_planes;
struct anv_ycbcr_conversion *conversion;
+
+ /* Blob of sampler state data which is guaranteed to be 32-byte aligned
+ * and with a 32-byte stride for use as bindless samplers.
+ */
+ struct anv_state bindless_state;
};
struct anv_framebuffer {
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 3189585cbd3..1af36bced24 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -110,10 +110,18 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
sba.InstructionBuffersizeModifyEnable = true;
# endif
# if (GEN_GEN >= 9)
- sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { NULL, 0 };
+ if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
+ sba.BindlessSurfaceStateBaseAddress = (struct anv_address) {
+ .bo = device->surface_state_pool.block_pool.bo,
+ .offset = 0,
+ };
+ sba.BindlessSurfaceStateSize = (1 << 20) - 1;
+ } else {
+ sba.BindlessSurfaceStateBaseAddress = ANV_NULL_ADDRESS;
+ sba.BindlessSurfaceStateSize = 0;
+ }
sba.BindlessSurfaceStateMOCS = GENX(MOCS);
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
- sba.BindlessSurfaceStateSize = 0;
# endif
# if (GEN_GEN >= 10)
sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 };
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index 283cd8c501a..9276dc9470b 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -328,6 +328,8 @@ VkResult genX(CreateSampler)(
VkSampler* pSampler)
{
ANV_FROM_HANDLE(anv_device, device, _device);
+ const struct anv_physical_device *pdevice =
+ &device->instance->physicalDevice;
struct anv_sampler *sampler;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
@@ -383,6 +385,17 @@ VkResult genX(CreateSampler)(
}
}
+ if (pdevice->has_bindless_samplers) {
+ /* If we have bindless, allocate enough samplers. We allocate 32 bytes
+ * for each sampler instead of 16 bytes because we want all bindless
+ * samplers to be 32-byte aligned so we don't have to use indirect
+ * sampler messages on them.
+ */
+ sampler->bindless_state =
+ anv_state_pool_alloc(&device->dynamic_state_pool,
+ sampler->n_planes * 32, 32);
+ }
+
for (unsigned p = 0; p < sampler->n_planes; p++) {
const bool plane_has_chroma =
sampler->conversion && sampler->conversion->format->planes[p].has_chroma;
@@ -452,6 +465,11 @@ VkResult genX(CreateSampler)(
};
GENX(SAMPLER_STATE_pack)(NULL, sampler->state[p], &sampler_state);
+
+ if (sampler->bindless_state.map) {
+ memcpy(sampler->bindless_state.map + p * 32,
+ sampler->state[p], GENX(SAMPLER_STATE_length) * 4);
+ }
}
*pSampler = anv_sampler_to_handle(sampler);