summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2016-01-25 13:42:19 +0100
committerSamuel Iglesias Gonsálvez <[email protected]>2016-05-16 09:55:33 +0200
commit943f9442bf7943a992730e642e91ed874d50790c (patch)
treed05fb53ae63e966418a93866f146954229cd34da
parentb9aa66aa516c100d5476ee966f428aaf743d786c (diff)
i965/fs: support doubles with ssbo stores
Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp39
1 files changed, 35 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 143d5f32e40..78e4f8415f1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3584,6 +3584,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
/* Writemask */
unsigned writemask = instr->const_index[0];
+ /* get_nir_src() retypes to integer. Be wary of 64-bit types though
+ * since the untyped writes below operate in units of 32-bits, which
+ * means that we need to write twice as many components each time.
+ * Also, we have to suffle 64-bit data to be in the appropriate layout
+ * expected by our 32-bit write messages.
+ */
+ unsigned type_size = 4;
+ unsigned bit_size = instr->src[0].is_ssa ?
+ instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
+ if (bit_size == 64) {
+ type_size = 8;
+ fs_reg tmp =
+ fs_reg(VGRF, alloc.allocate(alloc.sizes[val_reg.nr]), val_reg.type);
+ shuffle_64bit_data_for_32bit_write(bld,
+ retype(tmp, BRW_REGISTER_TYPE_F),
+ retype(val_reg, BRW_REGISTER_TYPE_DF),
+ instr->num_components);
+ val_reg = tmp;
+ }
+
+ unsigned type_slots = type_size / 4;
+
/* Combine groups of consecutive enabled channels in one write
* message. We use ffs to find the first enabled channel and then ffs on
* the bit-inverse, down-shifted writemask to determine the length of
@@ -3593,20 +3615,29 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned first_component = ffs(writemask) - 1;
unsigned length = ffs(~(writemask >> first_component)) - 1;
+ /* We can't write more than 2 64-bit components at once. Limit the
+ * length of the write to what we can do and let the next iteration
+ * handle the rest
+ */
+ if (type_size > 4)
+ length = MIN2(2, length);
+
fs_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
if (const_offset) {
- offset_reg = brw_imm_ud(const_offset->u32[0] + 4 * first_component);
+ offset_reg = brw_imm_ud(const_offset->u32[0] +
+ type_size * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(4 * first_component));
+ brw_imm_ud(type_size * first_component));
}
+
emit_untyped_write(bld, surf_index, offset_reg,
- offset(val_reg, bld, first_component),
- 1 /* dims */, length,
+ offset(val_reg, bld, first_component * type_slots),
+ 1 /* dims */, length * type_slots,
BRW_PREDICATE_NONE);
/* Clear the bits in the writemask that we just wrote, then try