diff options
author | Jason Ekstrand <[email protected]> | 2019-01-09 16:04:22 -0600 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2019-04-19 19:56:42 +0000 |
commit | 79fb0d27f3ab41fec88acbe24bc3163a42c0715c (patch) | |
tree | 053d3bba13cc24b8711c43470b25546b9508eec1 /src/intel/vulkan/anv_device.c | |
parent | 3cf78ec2bdc22833082d026d59ccb28d79b07f6f (diff) |
anv: Implement SSBOs bindings with GPU addresses in the descriptor BO
This commit adds a new way for ANV to do SSBO bindings by just passing a
GPU address in through the descriptor buffer and using the A64 messages
to access the GPU address directly. This means that our variable
pointers are now "real" pointers instead of a vec2(BTI, offset) pair.
This carries a few of advantages:
1. It lets us support a virtually unbounded number of SSBO bindings.
2. It lets us implement VK_KHR_shader_atomic_int64 which we couldn't
implement before because those atomic messages are only available
in the bindless A64 form.
3. It's way better than messing around with bindless handles for SSBOs
which is the only other option for VK_EXT_descriptor_indexing.
4. It's more future looking, maybe? At the least, this is what NVIDIA
does (they don't have binding based SSBOs at all). This doesn't a
priori mean it's better, it just means it's probably not terrible.
The big disadvantage, of course, is that we have to start doing our own
bounds checking for robustBufferAccess again have to push in dynamic
offsets.
Reviewed-by: Lionel Landwerlin <[email protected]>
Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
Diffstat (limited to 'src/intel/vulkan/anv_device.c')
-rw-r--r-- | src/intel/vulkan/anv_device.c | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 8c60b917050..de56926d935 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -278,6 +278,8 @@ anv_physical_device_init_uuids(struct anv_physical_device *device) sizeof(device->chipset_id)); _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless, sizeof(device->always_use_bindless)); + _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access, + sizeof(device->has_a64_buffer_access)); _mesa_sha1_final(&sha1_ctx, sha1); memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE); @@ -1103,9 +1105,15 @@ void anv_GetPhysicalDeviceProperties( const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ? (1ul << 30) : (1ul << 27); + const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64; const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ? 128 : 16; + /* The moment we have anything bindless, claim a high per-stage limit */ + const uint32_t max_per_stage = + pdevice->has_a64_buffer_access ? UINT32_MAX : + MAX_BINDING_TABLE_SIZE - MAX_RTS; + VkSampleCountFlags sample_counts = isl_device_get_sample_counts(&pdevice->isl_dev); @@ -1127,15 +1135,15 @@ void anv_GetPhysicalDeviceProperties( .maxBoundDescriptorSets = MAX_SETS, .maxPerStageDescriptorSamplers = max_samplers, .maxPerStageDescriptorUniformBuffers = 64, - .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorStorageBuffers = max_ssbos, .maxPerStageDescriptorSampledImages = max_samplers, .maxPerStageDescriptorStorageImages = MAX_IMAGES, .maxPerStageDescriptorInputAttachments = 64, - .maxPerStageResources = MAX_BINDING_TABLE_SIZE - MAX_RTS, + .maxPerStageResources = max_per_stage, .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */ .maxDescriptorSetUniformBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorUniformBuffers */ .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, - .maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */ + .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */ .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, .maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */ .maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */ |