diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 31 | ||||
-rw-r--r-- | src/intel/isl/isl_surface_state.c | 22 | ||||
-rw-r--r-- | src/intel/vulkan/anv_device.c | 11 |
3 files changed, 62 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 8efec34cc9d..d8300589a5e 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4290,7 +4290,36 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr inst->mlen = 1; inst->size_written = 4 * REG_SIZE; - bld.MOV(retype(dest, ret_payload.type), component(ret_payload, 0)); + /* SKL PRM, vol07, 3D Media GPGPU Engine, Bounds Checking and Faulting: + * + * "Out-of-bounds checking is always performed at a DWord granularity. If + * any part of the DWord is out-of-bounds then the whole DWord is + * considered out-of-bounds." + * + * This implies that types with size smaller than 4-bytes need to be + * padded if they don't complete the last dword of the buffer. But as we + * need to maintain the original size we need to reverse the padding + * calculation to return the correct size to know the number of elements + * of an unsized array. As we stored in the last two bits of the surface + * size the needed padding for the buffer, we calculate here the + * original buffer_size reversing the surface_size calculation: + * + * surface_size = isl_align(buffer_size, 4) + + * (isl_align(buffer_size) - buffer_size) + * + * buffer_size = surface_size & ~3 - surface_size & 3 + */ + + fs_reg size_aligned4 = ubld.vgrf(BRW_REGISTER_TYPE_UD); + fs_reg size_padding = ubld.vgrf(BRW_REGISTER_TYPE_UD); + fs_reg buffer_size = ubld.vgrf(BRW_REGISTER_TYPE_UD); + + ubld.AND(size_padding, ret_payload, brw_imm_ud(3)); + ubld.AND(size_aligned4, ret_payload, brw_imm_ud(~3)); + ubld.ADD(buffer_size, size_aligned4, negate(size_padding)); + + bld.MOV(retype(dest, ret_payload.type), component(buffer_size, 0)); + brw_mark_surface_used(prog_data, index); break; } diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index bfb27fa4a44..c205b3d2c0b 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -673,7 +673,27 @@ void isl_genX(buffer_fill_state_s)(void *state, const struct isl_buffer_fill_state_info *restrict info) { - uint32_t num_elements = info->size / info->stride; + uint64_t buffer_size = info->size; + + /* Uniform and Storage buffers need to have surface size not less that the + * aligned 32-bit size of the buffer. To calculate the array lenght on + * unsized arrays in StorageBuffer the last 2 bits store the padding size + * added to the surface, so we can calculate latter the original buffer + * size to know the number of elements. + * + * surface_size = isl_align(buffer_size, 4) + + * (isl_align(buffer_size) - buffer_size) + * + * buffer_size = (surface_size & ~3) - (surface_size & 3) + */ + if (info->format == ISL_FORMAT_RAW || + info->stride < isl_format_get_layout(info->format)->bpb / 8) { + assert(info->stride == 1); + uint64_t aligned_size = isl_align(buffer_size, 4); + buffer_size = aligned_size + (aligned_size - buffer_size); + } + + uint32_t num_elements = buffer_size / info->stride; if (GEN_GEN >= 7) { /* From the IVB PRM, SURFACE_STATE::Height, diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 3d44bfd43fc..41f9e00c4e7 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -2155,6 +2155,17 @@ void anv_GetBufferMemoryRequirements( pMemoryRequirements->size = buffer->size; pMemoryRequirements->alignment = alignment; + + /* Storage and Uniform buffers should have their size aligned to + * 32-bits to avoid boundary checks when last DWord is not complete. + * This would ensure that not internal padding would be needed for + * 16-bit types. + */ + if (device->robust_buffer_access && + (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT || + buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)) + pMemoryRequirements->size = align_u64(buffer->size, 4); + pMemoryRequirements->memoryTypeBits = memory_types; } |