summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp31
-rw-r--r--src/intel/isl/isl_surface_state.c22
-rw-r--r--src/intel/vulkan/anv_device.c11
3 files changed, 62 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 8efec34cc9d..d8300589a5e 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4290,7 +4290,36 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
inst->mlen = 1;
inst->size_written = 4 * REG_SIZE;
- bld.MOV(retype(dest, ret_payload.type), component(ret_payload, 0));
+ /* SKL PRM, vol07, 3D Media GPGPU Engine, Bounds Checking and Faulting:
+ *
+ * "Out-of-bounds checking is always performed at a DWord granularity. If
+ * any part of the DWord is out-of-bounds then the whole DWord is
+ * considered out-of-bounds."
+ *
+ * This implies that types with size smaller than 4-bytes need to be
+ * padded if they don't complete the last dword of the buffer. But as we
+ * need to maintain the original size we need to reverse the padding
+ * calculation to return the correct size to know the number of elements
+ * of an unsized array. As we stored in the last two bits of the surface
+ * size the needed padding for the buffer, we calculate here the
+ * original buffer_size reversing the surface_size calculation:
+ *
+ * surface_size = isl_align(buffer_size, 4) +
+ * (isl_align(buffer_size) - buffer_size)
+ *
+ * buffer_size = surface_size & ~3 - surface_size & 3
+ */
+
+ fs_reg size_aligned4 = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ fs_reg size_padding = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ fs_reg buffer_size = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+
+ ubld.AND(size_padding, ret_payload, brw_imm_ud(3));
+ ubld.AND(size_aligned4, ret_payload, brw_imm_ud(~3));
+ ubld.ADD(buffer_size, size_aligned4, negate(size_padding));
+
+ bld.MOV(retype(dest, ret_payload.type), component(buffer_size, 0));
+
brw_mark_surface_used(prog_data, index);
break;
}
diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c
index bfb27fa4a44..c205b3d2c0b 100644
--- a/src/intel/isl/isl_surface_state.c
+++ b/src/intel/isl/isl_surface_state.c
@@ -673,7 +673,27 @@ void
isl_genX(buffer_fill_state_s)(void *state,
const struct isl_buffer_fill_state_info *restrict info)
{
- uint32_t num_elements = info->size / info->stride;
+ uint64_t buffer_size = info->size;
+
+ /* Uniform and Storage buffers need to have surface size not less that the
+ * aligned 32-bit size of the buffer. To calculate the array lenght on
+ * unsized arrays in StorageBuffer the last 2 bits store the padding size
+ * added to the surface, so we can calculate latter the original buffer
+ * size to know the number of elements.
+ *
+ * surface_size = isl_align(buffer_size, 4) +
+ * (isl_align(buffer_size) - buffer_size)
+ *
+ * buffer_size = (surface_size & ~3) - (surface_size & 3)
+ */
+ if (info->format == ISL_FORMAT_RAW ||
+ info->stride < isl_format_get_layout(info->format)->bpb / 8) {
+ assert(info->stride == 1);
+ uint64_t aligned_size = isl_align(buffer_size, 4);
+ buffer_size = aligned_size + (aligned_size - buffer_size);
+ }
+
+ uint32_t num_elements = buffer_size / info->stride;
if (GEN_GEN >= 7) {
/* From the IVB PRM, SURFACE_STATE::Height,
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 3d44bfd43fc..41f9e00c4e7 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -2155,6 +2155,17 @@ void anv_GetBufferMemoryRequirements(
pMemoryRequirements->size = buffer->size;
pMemoryRequirements->alignment = alignment;
+
+ /* Storage and Uniform buffers should have their size aligned to
+ * 32-bits to avoid boundary checks when last DWord is not complete.
+ * This would ensure that not internal padding would be needed for
+ * 16-bit types.
+ */
+ if (device->robust_buffer_access &&
+ (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
+ buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
+ pMemoryRequirements->size = align_u64(buffer->size, 4);
+
pMemoryRequirements->memoryTypeBits = memory_types;
}