diff options
author | Iago Toral Quiroga <[email protected]> | 2016-02-12 14:05:11 +0100 |
---|---|---|
committer | Samuel Iglesias Gonsálvez <[email protected]> | 2017-01-03 11:26:51 +0100 |
commit | 8eea41e75d86bfe9bef5f69b25ad797da236a008 (patch) | |
tree | 060da870f09d5d48ff7061b38f8686d5d8fdc638 | |
parent | 9998d55afd179ad5019d3841e4c3255a02fd2d7b (diff) |
i965/vec4: Fix SSBO stores for 64-bit data
In this case we need to shuffle the 64-bit data before we write it
to memory, source from reg_offset + 1 to write components Z and W
and consider that each DF channel is twice as big.
v2: use byte_offset() instead of offset().
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 40 |
1 files changed, 32 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 589bc907ef5..91e1d36714f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -503,7 +503,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } /* Value */ - src_reg val_reg = get_nir_src(instr->src[0], 4); + src_reg val_reg = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, 4); /* Writemask */ unsigned write_mask = instr->const_index[0]; @@ -549,24 +549,47 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) const vec4_builder bld = vec4_builder(this).at_end() .annotate(current_annotation, base_ir); - int swizzle[4] = { 0, 0, 0, 0}; + unsigned type_slots = nir_src_bit_size(instr->src[0]) / 32; + if (type_slots == 2) { + dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); + shuffle_64bit_data(tmp, retype(val_reg, tmp.type), true); + val_reg = src_reg(retype(tmp, BRW_REGISTER_TYPE_F)); + } + + uint8_t swizzle[4] = { 0, 0, 0, 0}; int num_channels = 0; unsigned skipped_channels = 0; int num_components = instr->num_components; for (int i = 0; i < num_components; i++) { + /* Read components Z/W of a dvec from the appropriate place. We will + * also have to adjust the swizzle (we do that with the '% 4' below) + */ + if (i == 2 && type_slots == 2) + val_reg = byte_offset(val_reg, REG_SIZE); + /* Check if this channel needs to be written. If so, record the * channel we need to take the data from in the swizzle array */ int component_mask = 1 << i; int write_test = write_mask & component_mask; - if (write_test) - swizzle[num_channels++] = i; + if (write_test) { + /* If we are writing doubles we have to write 2 channels worth of + * of data (64 bits) for each double component. + */ + swizzle[num_channels++] = (i * type_slots) % 4; + if (type_slots == 2) + swizzle[num_channels++] = (i * type_slots + 1) % 4; + } /* If we don't have to write this channel it means we have a gap in the * vector, so write the channels we accumulated until now, if any. Do - * the same if this was the last component in the vector. + * the same if this was the last component in the vector, if we have + * enough channels for a full vec4 write or if we have processed + * components XY of a dvec (since components ZW are not in the same + * SIMD register) */ - if (!write_test || i == num_components - 1) { + if (!write_test || i == num_components - 1 || num_channels == 4 || + (i == 1 && type_slots == 2)) { if (num_channels > 0) { /* We have channels to write, so update the offset we need to * write at to skip the channels we skipped, if any. @@ -600,8 +623,9 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) num_channels = 0; } - /* We did not write the current channel, so increase skipped count */ - skipped_channels++; + /* If we didn't write the channel, increase skipped count */ + if (!write_test) + skipped_channels += type_slots; } } |