diff options
author | Jose Maria Casanova Crespo <[email protected]> | 2018-02-01 00:26:04 +0100 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2018-02-28 21:37:40 -0800 |
commit | 69be3a82ca6f3247c75d76ae97429462c8909a3c (patch) | |
tree | 99847172d151b66d303cfb55f9db276ed0e634b3 /src/intel/compiler | |
parent | 8dd8be0323bbc207631a39f43cff7b898af4a55a (diff) |
i965/fs: Support 16-bit store_ssbo with VK_KHR_relaxed_block_layout
Restrict the use of untyped_surface_write with 16-bit pairs in
ssbo to the cases where we can guarantee that offset is multiple
of 4.
Taking into account that VK_KHR_relaxed_block_layout is available
in ANV we can only guarantee that when we have a constant offset
that is multiple of 4. For non constant offsets we will always use
byte_scattered_write.
v2: (Jason Ekstrand)
- Assert offset_reg to be multiple of 4 if it is immediate.
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 22 |
1 files changed, 15 insertions, 7 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 3f077b3c919..73f424cf10a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4130,6 +4130,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned num_components = ffs(~(writemask >> first_component)) - 1; fs_reg write_src = offset(val_reg, bld, first_component); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); + if (type_size > 4) { /* We can't write more than 2 64-bit components at once. Limit * the num_components of the write to what we can do and let the next @@ -4145,14 +4147,19 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * 32-bit-aligned we need to use byte-scattered writes because * untyped writes works with 32-bit components with 32-bit * alignment. byte_scattered_write messages only support one - * 16-bit component at a time. + * 16-bit component at a time. As VK_KHR_relaxed_block_layout + * could be enabled we can not guarantee that not constant offsets + * to be 32-bit aligned for 16-bit types. For example an array, of + * 16-bit vec3 with array element stride of 6. * - * For example, if there is a 3-components vector we submit one - * untyped-write message of 32-bit (first two components), and one - * byte-scattered write message (the last component). + * In the case of 32-bit aligned constant offsets if there is + * a 3-components vector we submit one untyped-write message + * of 32-bit (first two components), and one byte-scattered + * write message (the last component). */ - if (first_component % 2) { + if ( !const_offset || ((const_offset->u32[0] + + type_size * first_component) % 4)) { /* If we use a .yz writemask we also need to emit 2 * byte-scattered write messages because of y-component not * being aligned to 32-bit. @@ -4178,7 +4185,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } fs_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); + if (const_offset) { offset_reg = brw_imm_ud(const_offset->u32[0] + type_size * first_component); @@ -4217,7 +4224,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } else { assert(num_components * type_size <= 16); assert((num_components * type_size) % 4 == 0); - assert((first_component * type_size) % 4 == 0); + assert(offset_reg.file != BRW_IMMEDIATE_VALUE || + offset_reg.ud % 4 == 0); unsigned num_slots = (num_components * type_size) / 4; emit_untyped_write(bld, surf_index, offset_reg, |