aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2016-06-22 11:44:15 +0200
committerSamuel Iglesias Gonsálvez <[email protected]>2017-01-03 11:26:51 +0100
commitd8e123cc5d66022069f3aee53318bfd1075bcc53 (patch)
tree5f74bcaf0a2b8559b9488cd488157cd7b458ca53
parent017c8df35b343de0bb23220d030089bf57fb28d4 (diff)
i965/vec4: Add a shuffle_64bit_data helper
SIMD4x2 64bit data is stored in register space like this: r0.0:DF x0 y0 z0 w0 r1.0:DF x1 y1 z1 w1 When we need to write data such as this to memory using 32-bit write messages we need to shuffle it in this fashion: r0.0:DF x0 y0 x1 y1 r0.1:DF z0 w0 z1 w1 and emit two 32-bit write messages, one for r0.0 at base_offset and another one for r0.1 at base_offset+16. We also need to do the inverse operation when we read using 32-bit messages to produce valid SIMD4x2 64bit data from the data read. We can achieve this by aplying the exact same shuffling to the data read, although we need to apply different channel enables since the layout of the data is reversed. This helper implements the data shuffling logic and we will use it in various places where we read and write 64bit data from/to memory. v2 (Curro): - Use the writemask helper and don't assert on the original writemask being XYZW. - Use the Vec4 IR builder to simplify the implementation. v3 (Iago): - Use byte_offset() instead of offset(). v3: - Fix typo (Matt) - Clarify the example and fix indention (Matt). Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp71
2 files changed, 76 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index c4a0004dbed..58b03265c34 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -328,6 +328,11 @@ public:
src_reg setup_imm_df(double v);
+ vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
+ bool for_write,
+ bblock_t *block = NULL,
+ vec4_instruction *ref = NULL);
+
virtual void emit_nir_code();
virtual void nir_setup_uniforms();
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index fe82ed8f15b..14d0546c5c0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -2224,4 +2224,75 @@ vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
dst_reg(VGRF, alloc.allocate(DIV_ROUND_UP(instr->def.bit_size, 32)));
}
+/* SIMD4x2 64bit data is stored in register space like this:
+ *
+ * r0.0:DF x0 y0 z0 w0
+ * r1.0:DF x1 y1 z1 w1
+ *
+ * When we need to write data such as this to memory using 32-bit write
+ * messages we need to shuffle it in this fashion:
+ *
+ * r0.0:DF x0 y0 x1 y1 (to be written at base offset)
+ * r0.0:DF z0 w0 z1 w1 (to be written at base offset + 16)
+ *
+ * We need to do the inverse operation when we read using 32-bit messages,
+ * which we can do by applying the same exact shuffling on the 64-bit data
+ * read, only that because the data for each vertex is positioned differently
+ * we need to apply different channel enables.
+ *
+ * This function takes 64bit data and shuffles it as explained above.
+ *
+ * The @for_write parameter is used to specify if the shuffling is being done
+ * for proper SIMD4x2 64-bit data that needs to be shuffled prior to a 32-bit
+ * write message (for_write = true), or instead we are doing the inverse
+ * operation and we have just read 64-bit data using a 32-bit messages that we
+ * need to shuffle to create valid SIMD4x2 64-bit data (for_write = false).
+ *
+ * If @block and @ref are non-NULL, then the shuffling is done after @ref,
+ * otherwise the instructions are emitted normally at the end. The function
+ * returns the last instruction inserted.
+ *
+ * Notice that @src and @dst cannot be the same register.
+ */
+vec4_instruction *
+vec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write,
+ bblock_t *block, vec4_instruction *ref)
+{
+ assert(type_sz(src.type) == 8);
+ assert(type_sz(dst.type) == 8);
+ assert(!regions_overlap(dst, 2 * REG_SIZE, src, 2 * REG_SIZE));
+ assert(!ref == !block);
+
+ const vec4_builder bld = !ref ? vec4_builder(this).at_end() :
+ vec4_builder(this).at(block, ref->next);
+
+ /* Resolve swizzle in src */
+ vec4_instruction *inst;
+ if (src.swizzle != BRW_SWIZZLE_XYZW) {
+ dst_reg data = dst_reg(this, glsl_type::dvec4_type);
+ inst = bld.MOV(data, src);
+ src = src_reg(data);
+ }
+
+ /* dst+0.XY = src+0.XY */
+ inst = bld.group(4, 0).MOV(writemask(dst, WRITEMASK_XY), src);
+
+ /* dst+0.ZW = src+1.XY */
+ inst = bld.group(4, for_write ? 1 : 0)
+ .MOV(writemask(dst, WRITEMASK_ZW),
+ swizzle(byte_offset(src, REG_SIZE), BRW_SWIZZLE_XYXY));
+
+ /* dst+1.XY = src+0.ZW */
+ inst = bld.group(4, for_write ? 0 : 1)
+ .MOV(writemask(byte_offset(dst, REG_SIZE), WRITEMASK_XY),
+ swizzle(src, BRW_SWIZZLE_ZWZW));
+
+ /* dst+1.ZW = src+1.ZW */
+ inst = bld.group(4, 1)
+ .MOV(writemask(byte_offset(dst, REG_SIZE), WRITEMASK_ZW),
+ byte_offset(src, REG_SIZE));
+
+ return inst;
+}
+
}