summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/vulkan/anv_cmd_buffer.c37
-rw-r--r--src/intel/vulkan/anv_descriptor_set.c37
-rw-r--r--src/intel/vulkan/anv_device.c14
-rw-r--r--src/intel/vulkan/anv_nir_apply_pipeline_layout.c242
-rw-r--r--src/intel/vulkan/anv_pipeline.c32
-rw-r--r--src/intel/vulkan/anv_private.h20
6 files changed, 347 insertions, 35 deletions
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 1b34644a434..981c071fc23 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -594,6 +594,14 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
*dynamic_offsets += set_layout->dynamic_offset_count;
*dynamic_offset_count -= set_layout->dynamic_offset_count;
+
+ if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) {
+ cmd_buffer->state.push_constants_dirty |=
+ VK_SHADER_STAGE_COMPUTE_BIT;
+ } else {
+ cmd_buffer->state.push_constants_dirty |=
+ VK_SHADER_STAGE_ALL_GRAPHICS;
+ }
}
}
@@ -739,7 +747,8 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
}
static uint32_t
-anv_push_constant_value(struct anv_push_constants *data, uint32_t param)
+anv_push_constant_value(const struct anv_cmd_pipeline_state *state,
+ const struct anv_push_constants *data, uint32_t param)
{
if (BRW_PARAM_IS_BUILTIN(param)) {
switch (param) {
@@ -754,20 +763,28 @@ anv_push_constant_value(struct anv_push_constants *data, uint32_t param)
default:
unreachable("Invalid param builtin");
}
- } else {
+ } else if (ANV_PARAM_IS_PUSH(param)) {
uint32_t offset = ANV_PARAM_PUSH_OFFSET(param);
assert(offset % sizeof(uint32_t) == 0);
if (offset < data->size)
return *(uint32_t *)((uint8_t *)data + offset);
else
return 0;
+ } else if (ANV_PARAM_IS_DYN_OFFSET(param)) {
+ unsigned idx = ANV_PARAM_DYN_OFFSET_IDX(param);
+ assert(idx < MAX_DYNAMIC_BUFFERS);
+ return state->dynamic_offsets[idx];
}
+
+ assert(!"Invalid param");
+ return 0;
}
struct anv_state
anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage)
{
+ struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.gfx.base;
struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
/* If we don't have this stage, bail. */
@@ -780,7 +797,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
pipeline->shaders[stage]->prog_data;
/* If we don't actually have any push constants, bail. */
- if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
+ if (prog_data == NULL || prog_data->nr_params == 0)
return (struct anv_state) { .offset = 0 };
struct anv_state state =
@@ -790,8 +807,10 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
/* Walk through the param array and fill the buffer with data */
uint32_t *u32_map = state.map;
- for (unsigned i = 0; i < prog_data->nr_params; i++)
- u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
+ for (unsigned i = 0; i < prog_data->nr_params; i++) {
+ u32_map[i] = anv_push_constant_value(pipeline_state, data,
+ prog_data->param[i]);
+ }
return state;
}
@@ -799,6 +818,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
{
+ struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.compute.base;
struct anv_push_constants *data =
cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
@@ -826,7 +846,8 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
i < cs_prog_data->push.cross_thread.dwords;
i++) {
assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID);
- u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
+ u32_map[i] = anv_push_constant_value(pipeline_state, data,
+ prog_data->param[i]);
}
}
@@ -840,8 +861,8 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) {
u32_map[dst] = t;
} else {
- u32_map[dst] =
- anv_push_constant_value(data, prog_data->param[src]);
+ u32_map[dst] = anv_push_constant_value(pipeline_state, data,
+ prog_data->param[src]);
}
}
}
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c
index 90a02997a8d..85915cfb9d0 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -84,6 +84,14 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
unreachable("Unsupported descriptor type");
}
+ /* On gen8 and above when we have softpin enabled, we also need to push
+ * SSBO address ranges so that we can use A64 messages in the shader.
+ */
+ if (device->has_a64_buffer_access &&
+ (type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
+ type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC))
+ data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
+
return data;
}
@@ -95,6 +103,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data)
if (data & ANV_DESCRIPTOR_IMAGE_PARAM)
size += BRW_IMAGE_PARAM_SIZE * 4;
+ if (data & ANV_DESCRIPTOR_ADDRESS_RANGE)
+ size += sizeof(struct anv_address_range_descriptor);
+
return size;
}
@@ -130,6 +141,11 @@ anv_descriptor_data_supports_bindless(const struct anv_physical_device *pdevice,
enum anv_descriptor_data data,
bool sampler)
{
+ if (data & ANV_DESCRIPTOR_ADDRESS_RANGE) {
+ assert(pdevice->has_a64_buffer_access);
+ return true;
+ }
+
return false;
}
@@ -1077,6 +1093,9 @@ anv_descriptor_set_write_buffer(struct anv_device *device,
assert(type == bind_layout->type);
+ struct anv_address bind_addr = anv_address_add(buffer->address, offset);
+ uint64_t bind_range = anv_buffer_get_range(buffer, offset, range);
+
if (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
*desc = (struct anv_descriptor) {
@@ -1091,8 +1110,8 @@ anv_descriptor_set_write_buffer(struct anv_device *device,
&set->buffer_views[bind_layout->buffer_view_index + element];
bview->format = anv_isl_format_for_descriptor_type(type);
- bview->range = anv_buffer_get_range(buffer, offset, range);
- bview->address = anv_address_add(buffer->address, offset);
+ bview->range = bind_range;
+ bview->address = bind_addr;
/* If we're writing descriptors through a push command, we need to
* allocate the surface state from the command buffer. Otherwise it will
@@ -1102,14 +1121,24 @@ anv_descriptor_set_write_buffer(struct anv_device *device,
bview->surface_state = anv_state_stream_alloc(alloc_stream, 64, 64);
anv_fill_buffer_surface_state(device, bview->surface_state,
- bview->format,
- bview->address, bview->range, 1);
+ bview->format, bind_addr, bind_range, 1);
*desc = (struct anv_descriptor) {
.type = type,
.buffer_view = bview,
};
}
+
+ void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
+ element * anv_descriptor_size(bind_layout);
+
+ if (bind_layout->data & ANV_DESCRIPTOR_ADDRESS_RANGE) {
+ struct anv_address_range_descriptor desc = {
+ .address = anv_address_physical(bind_addr),
+ .range = bind_range,
+ };
+ memcpy(desc_map, &desc, sizeof(desc));
+ }
}
void
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 8c60b917050..de56926d935 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -278,6 +278,8 @@ anv_physical_device_init_uuids(struct anv_physical_device *device)
sizeof(device->chipset_id));
_mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
sizeof(device->always_use_bindless));
+ _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
+ sizeof(device->has_a64_buffer_access));
_mesa_sha1_final(&sha1_ctx, sha1);
memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
@@ -1103,9 +1105,15 @@ void anv_GetPhysicalDeviceProperties(
const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
(1ul << 30) : (1ul << 27);
+ const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
128 : 16;
+ /* The moment we have anything bindless, claim a high per-stage limit */
+ const uint32_t max_per_stage =
+ pdevice->has_a64_buffer_access ? UINT32_MAX :
+ MAX_BINDING_TABLE_SIZE - MAX_RTS;
+
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
@@ -1127,15 +1135,15 @@ void anv_GetPhysicalDeviceProperties(
.maxBoundDescriptorSets = MAX_SETS,
.maxPerStageDescriptorSamplers = max_samplers,
.maxPerStageDescriptorUniformBuffers = 64,
- .maxPerStageDescriptorStorageBuffers = 64,
+ .maxPerStageDescriptorStorageBuffers = max_ssbos,
.maxPerStageDescriptorSampledImages = max_samplers,
.maxPerStageDescriptorStorageImages = MAX_IMAGES,
.maxPerStageDescriptorInputAttachments = 64,
- .maxPerStageResources = MAX_BINDING_TABLE_SIZE - MAX_RTS,
+ .maxPerStageResources = max_per_stage,
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
.maxDescriptorSetUniformBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorUniformBuffers */
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
- .maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */
+ .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
.maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */
.maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 7abc27be103..356a56e47bb 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -45,6 +45,8 @@ struct apply_pipeline_layout_state {
/* Place to flag lowered instructions so we don't lower them twice */
struct set *lowered_instrs;
+ int dynamic_offset_uniform_start;
+
bool uses_constants;
uint8_t constants_offset;
struct {
@@ -159,7 +161,12 @@ find_descriptor_for_index_src(nir_src src,
if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
return false;
- return true;
+ uint32_t set = nir_intrinsic_desc_set(intrin);
+ uint32_t binding = nir_intrinsic_binding(intrin);
+ uint32_t surface_index = state->set[set].surface_offsets[binding];
+
+ /* Only lower to a BTI message if we have a valid binding table index. */
+ return surface_index < MAX_BINDING_TABLE_SIZE;
}
static bool
@@ -327,6 +334,7 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
uint32_t set = nir_intrinsic_desc_set(intrin);
uint32_t binding = nir_intrinsic_binding(intrin);
+ const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
const struct anv_descriptor_set_binding_layout *bind_layout =
&state->layout->set[set].layout->binding[binding];
@@ -339,14 +347,55 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1));
nir_ssa_def *index;
- if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) {
+ if (state->pdevice->has_a64_buffer_access &&
+ (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
+ desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
+ /* We store the descriptor offset as 16.8.8 where the top 16 bits are
+ * the offset into the descriptor set, the next 8 are the binding table
+ * index of the descriptor buffer, and the bottom 8 bits are the offset
+ * (in bytes) into the dynamic offset table.
+ */
+ assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
+ uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */
+ if (bind_layout->dynamic_offset_index >= 0) {
+ dynamic_offset_index =
+ state->layout->set[set].dynamic_offset_start +
+ bind_layout->dynamic_offset_index;
+ }
+
+ const uint32_t desc_offset =
+ bind_layout->descriptor_offset << 16 |
+ (uint32_t)state->set[set].desc_offset << 8 |
+ dynamic_offset_index;
+
+ if (state->add_bounds_checks) {
+ /* We're using nir_address_format_64bit_bounded_global */
+ assert(intrin->dest.ssa.num_components == 4);
+ assert(intrin->dest.ssa.bit_size == 32);
+ index = nir_vec4(b, nir_imm_int(b, desc_offset),
+ nir_ssa_for_src(b, intrin->src[0], 1),
+ nir_imm_int(b, array_size - 1),
+ nir_ssa_undef(b, 1, 32));
+ } else {
+ /* We're using nir_address_format_64bit_global */
+ assert(intrin->dest.ssa.num_components == 1);
+ assert(intrin->dest.ssa.bit_size == 64);
+ index = nir_pack_64_2x32_split(b, nir_imm_int(b, desc_offset),
+ nir_ssa_for_src(b, intrin->src[0], 1));
+ }
+ } else if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) {
/* This is an inline uniform block. Just reference the descriptor set
- * and use the descriptor offset as the base.
+ * and use the descriptor offset as the base. Inline uniforms always
+ * use nir_address_format_32bit_index_offset
*/
+ assert(intrin->dest.ssa.num_components == 2);
+ assert(intrin->dest.ssa.bit_size == 32);
index = nir_imm_ivec2(b, state->set[set].desc_offset,
bind_layout->descriptor_offset);
} else {
/* We're using nir_address_format_32bit_index_offset */
+ assert(intrin->dest.ssa.num_components == 2);
+ assert(intrin->dest.ssa.bit_size == 32);
index = nir_vec2(b, nir_iadd_imm(b, array_index, surface_index),
nir_imm_int(b, 0));
}
@@ -364,6 +413,8 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
b->cursor = nir_before_instr(&intrin->instr);
+ const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
+
/* For us, the resource indices are just indices into the binding table and
* array elements are sequential. A resource_reindex just turns into an
* add of the two indices.
@@ -372,15 +423,81 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
nir_ssa_def *old_index = intrin->src[0].ssa;
nir_ssa_def *offset = intrin->src[1].ssa;
- nir_ssa_def *new_index =
- nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset),
- nir_channel(b, old_index, 1));
+ nir_ssa_def *new_index;
+ if (state->pdevice->has_a64_buffer_access &&
+ (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
+ desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
+ if (state->add_bounds_checks) {
+ /* We're using nir_address_format_64bit_bounded_global */
+ assert(intrin->dest.ssa.num_components == 4);
+ assert(intrin->dest.ssa.bit_size == 32);
+ new_index = nir_vec4(b, nir_channel(b, old_index, 0),
+ nir_iadd(b, nir_channel(b, old_index, 1),
+ offset),
+ nir_channel(b, old_index, 2),
+ nir_ssa_undef(b, 1, 32));
+ } else {
+ /* We're using nir_address_format_64bit_global */
+ assert(intrin->dest.ssa.num_components == 1);
+ assert(intrin->dest.ssa.bit_size == 64);
+ nir_ssa_def *base = nir_unpack_64_2x32_split_x(b, old_index);
+ nir_ssa_def *arr_idx = nir_unpack_64_2x32_split_y(b, old_index);
+ new_index = nir_pack_64_2x32_split(b, base, nir_iadd(b, arr_idx, offset));
+ }
+ } else {
+ /* We're using nir_address_format_32bit_index_offset */
+ assert(intrin->dest.ssa.num_components == 2);
+ assert(intrin->dest.ssa.bit_size == 32);
+ new_index = nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset),
+ nir_channel(b, old_index, 1));
+ }
assert(intrin->dest.is_ssa);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(new_index));
nir_instr_remove(&intrin->instr);
}
+static nir_ssa_def *
+build_ssbo_descriptor_load(const VkDescriptorType desc_type,
+ nir_ssa_def *index,
+ struct apply_pipeline_layout_state *state)
+{
+ nir_builder *b = &state->builder;
+
+ nir_ssa_def *desc_offset, *array_index;
+ if (state->add_bounds_checks) {
+ /* We're using nir_address_format_64bit_bounded_global */
+ desc_offset = nir_channel(b, index, 0);
+ array_index = nir_umin(b, nir_channel(b, index, 1),
+ nir_channel(b, index, 2));
+ } else {
+ desc_offset = nir_unpack_64_2x32_split_x(b, index);
+ array_index = nir_unpack_64_2x32_split_y(b, index);
+ }
+
+ /* The desc_offset is actually 16.8.8 */
+ nir_ssa_def *desc_buffer_index =
+ nir_extract_u8(b, desc_offset, nir_imm_int(b, 1));
+ nir_ssa_def *desc_offset_base =
+ nir_extract_u16(b, desc_offset, nir_imm_int(b, 1));
+
+ /* Compute the actual descriptor offset */
+ const unsigned descriptor_size =
+ anv_descriptor_type_size(state->pdevice, desc_type);
+ desc_offset = nir_iadd(b, desc_offset_base,
+ nir_imul_imm(b, array_index, descriptor_size));
+
+ nir_intrinsic_instr *desc_load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
+ desc_load->src[0] = nir_src_for_ssa(desc_buffer_index);
+ desc_load->src[1] = nir_src_for_ssa(desc_offset);
+ desc_load->num_components = 4;
+ nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 4, 32, NULL);
+ nir_builder_instr_insert(b, &desc_load->instr);
+
+ return &desc_load->dest.ssa;
+}
+
static void
lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin,
struct apply_pipeline_layout_state *state)
@@ -389,12 +506,84 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin,
b->cursor = nir_before_instr(&intrin->instr);
- /* We follow the nir_address_format_32bit_index_offset model */
+ const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
+
assert(intrin->src[0].is_ssa);
nir_ssa_def *index = intrin->src[0].ssa;
+ nir_ssa_def *desc;
+ if (state->pdevice->has_a64_buffer_access &&
+ (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
+ desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
+ desc = build_ssbo_descriptor_load(desc_type, index, state);
+
+ /* We want nir_address_format_64bit_global */
+ if (!state->add_bounds_checks)
+ desc = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3));
+
+ if (state->dynamic_offset_uniform_start >= 0) {
+ /* This shader has dynamic offsets and we have no way of knowing
+ * (save from the dynamic offset base index) if this buffer has a
+ * dynamic offset.
+ */
+ nir_ssa_def *desc_offset, *array_index;
+ if (state->add_bounds_checks) {
+ /* We're using nir_address_format_64bit_bounded_global */
+ desc_offset = nir_channel(b, index, 0);
+ array_index = nir_umin(b, nir_channel(b, index, 1),
+ nir_channel(b, index, 2));
+ } else {
+ desc_offset = nir_unpack_64_2x32_split_x(b, index);
+ array_index = nir_unpack_64_2x32_split_y(b, index);
+ }
+
+ nir_ssa_def *dyn_offset_base =
+ nir_extract_u8(b, desc_offset, nir_imm_int(b, 0));
+ nir_ssa_def *dyn_offset_idx =
+ nir_iadd(b, dyn_offset_base, array_index);
+ if (state->add_bounds_checks) {
+ dyn_offset_idx = nir_umin(b, dyn_offset_idx,
+ nir_imm_int(b, MAX_DYNAMIC_BUFFERS));
+ }
+
+ nir_intrinsic_instr *dyn_load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
+ nir_intrinsic_set_base(dyn_load, state->dynamic_offset_uniform_start);
+ nir_intrinsic_set_range(dyn_load, MAX_DYNAMIC_BUFFERS * 4);
+ dyn_load->src[0] = nir_src_for_ssa(nir_imul_imm(b, dyn_offset_idx, 4));
+ dyn_load->num_components = 1;
+ nir_ssa_dest_init(&dyn_load->instr, &dyn_load->dest, 1, 32, NULL);
+ nir_builder_instr_insert(b, &dyn_load->instr);
+
+ nir_ssa_def *dynamic_offset =
+ nir_bcsel(b, nir_ieq(b, dyn_offset_base, nir_imm_int(b, 0xff)),
+ nir_imm_int(b, 0), &dyn_load->dest.ssa);
+
+ if (state->add_bounds_checks) {
+ /* The dynamic offset gets added to the base pointer so that we
+ * have a sliding window range.
+ *
+ * We're using nir_address_format_64bit_bounded_global.
+ */
+ nir_ssa_def *base_ptr =
+ nir_pack_64_2x32(b, nir_channels(b, desc, 0x3));
+ base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
+ desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
+ nir_unpack_64_2x32_split_y(b, base_ptr),
+ nir_channel(b, desc, 2),
+ nir_channel(b, desc, 3));
+ } else {
+ /* We're using nir_address_format_64bit_global */
+ desc = nir_iadd(b, desc, nir_u2u64(b, dynamic_offset));
+ }
+ }
+ } else {
+ /* We follow the nir_address_format_32bit_index_offset model */
+ desc = index;
+ }
+
assert(intrin->dest.is_ssa);
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(index));
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc));
nir_instr_remove(&intrin->instr);
}
@@ -409,15 +598,24 @@ lower_get_buffer_size(nir_intrinsic_instr *intrin,
b->cursor = nir_before_instr(&intrin->instr);
+ const VkDescriptorType desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+
assert(intrin->src[0].is_ssa);
nir_ssa_def *index = intrin->src[0].ssa;
- /* We're following the nir_address_format_32bit_index_offset model so the
- * binding table index is the first component of the address. The
- * back-end wants a scalar binding table index source.
- */
- nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
- nir_src_for_ssa(nir_channel(b, index, 0)));
+ if (state->pdevice->has_a64_buffer_access) {
+ nir_ssa_def *desc = build_ssbo_descriptor_load(desc_type, index, state);
+ nir_ssa_def *size = nir_channel(b, desc, 2);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(size));
+ nir_instr_remove(&intrin->instr);
+ } else {
+ /* We're following the nir_address_format_32bit_index_offset model so
+ * the binding table index is the first component of the address. The
+ * back-end wants a scalar binding table index source.
+ */
+ nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+ nir_src_for_ssa(nir_channel(b, index, 0)));
+ }
}
static nir_ssa_def *
@@ -724,6 +922,7 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
.layout = layout,
.add_bounds_checks = robust_buffer_access,
.lowered_instrs = _mesa_pointer_set_create(mem_ctx),
+ .dynamic_offset_uniform_start = -1,
};
for (unsigned s = 0; s < layout->num_sets; s++) {
@@ -813,11 +1012,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
qsort(infos, used_binding_count, sizeof(struct binding_info),
compare_binding_infos);
+ bool have_dynamic_buffers = false;
+
for (unsigned i = 0; i < used_binding_count; i++) {
unsigned set = infos[i].set, b = infos[i].binding;
struct anv_descriptor_set_binding_layout *binding =
&layout->set[set].layout->binding[b];
+ if (binding->dynamic_offset_index >= 0)
+ have_dynamic_buffers = true;
+
const uint32_t array_size = binding->array_size;
if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
@@ -874,6 +1078,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
}
}
+ if (have_dynamic_buffers) {
+ state.dynamic_offset_uniform_start = shader->num_uniforms;
+ uint32_t *param = brw_stage_prog_data_add_params(prog_data,
+ MAX_DYNAMIC_BUFFERS);
+ for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++)
+ param[i] = ANV_PARAM_DYN_OFFSET(i);
+ shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 4;
+ assert(shader->num_uniforms == prog_data->nr_params * 4);
+ }
+
nir_foreach_variable(var, &shader->uniforms) {
const struct glsl_type *glsl_type = glsl_without_array(var->type);
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 2dd60f2dd2c..b0ed2187376 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -166,12 +166,20 @@ anv_shader_compile_to_nir(struct anv_device *device,
.variable_pointers = true,
},
.ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
- .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
.phys_ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1),
.push_const_ptr_type = glsl_uint_type(),
.shared_ptr_type = glsl_uint_type(),
};
+ if (pdevice->has_a64_buffer_access) {
+ if (device->robust_buffer_access)
+ spirv_options.ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
+ else
+ spirv_options.ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1);
+ } else {
+ spirv_options.ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2);
+ }
+
nir_function *entry_point =
spirv_to_nir(spirv, module->size / 4,
spec_entries, num_spec_entries,
@@ -553,8 +561,9 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
struct anv_pipeline_stage *stage,
struct anv_pipeline_layout *layout)
{
- const struct brw_compiler *compiler =
- pipeline->device->instance->physicalDevice.compiler;
+ const struct anv_physical_device *pdevice =
+ &pipeline->device->instance->physicalDevice;
+ const struct brw_compiler *compiler = pdevice->compiler;
struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
nir_shader *nir = stage->nir;
@@ -607,15 +616,26 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
if (layout) {
- anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice,
+ anv_nir_apply_pipeline_layout(pdevice,
pipeline->device->robust_buffer_access,
layout, nir, prog_data,
&stage->bind_map);
- NIR_PASS_V(nir, nir_lower_explicit_io,
- nir_var_mem_ubo | nir_var_mem_ssbo,
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
nir_address_format_32bit_index_offset);
+ nir_address_format ssbo_address_format;
+ if (pdevice->has_a64_buffer_access) {
+ if (pipeline->device->robust_buffer_access)
+ ssbo_address_format = nir_address_format_64bit_bounded_global;
+ else
+ ssbo_address_format = nir_address_format_64bit_global;
+ } else {
+ ssbo_address_format = nir_address_format_32bit_index_offset;
+ }
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
+ ssbo_address_format);
+
NIR_PASS_V(nir, nir_opt_constant_folding);
}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index ac63ab8b3be..9c747fa019c 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1521,6 +1521,19 @@ struct anv_vue_header {
float PointWidth;
};
+/** Struct representing a address/range descriptor
+ *
+ * The fields of this struct correspond directly to the data layout of
+ * nir_address_format_64bit_bounded_global addresses. The last field is the
+ * offset in the NIR address so it must be zero so that when you load the
+ * descriptor you get a pointer to the start of the range.
+ */
+struct anv_address_range_descriptor {
+ uint64_t address;
+ uint32_t range;
+ uint32_t zero;
+};
+
enum anv_descriptor_data {
/** The descriptor contains a BTI reference to a surface state */
ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0),
@@ -1532,6 +1545,8 @@ enum anv_descriptor_data {
ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3),
/** The descriptor contains auxiliary image layout data */
ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
+ /** anv_address_range_descriptor with a buffer address and range */
+ ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5),
};
struct anv_descriptor_set_binding_layout {
@@ -2086,8 +2101,13 @@ struct anv_xfb_binding {
};
#define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset))
+#define ANV_PARAM_IS_PUSH(param) ((uint32_t)(param) >> 16 == 1)
#define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff)
+#define ANV_PARAM_DYN_OFFSET(offset) ((2 << 16) | (uint32_t)(offset))
+#define ANV_PARAM_IS_DYN_OFFSET(param) ((uint32_t)(param) >> 16 == 2)
+#define ANV_PARAM_DYN_OFFSET_IDX(param) ((param) & 0xffff)
+
struct anv_push_constants {
/* Current allocated size of this push constants data structure.
* Because a decent chunk of it may not be used (images on SKL, for