summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorBen Crocker <[email protected]>2017-08-23 16:32:43 -0400
committerAndres Gomez <[email protected]>2017-09-06 18:05:10 +0300
commite529fd4e116da284aa3340dc5427fa602a588163 (patch)
tree6d6f5af2af9e7693d5908734a28dcf105ad82121 /src/gallium
parenta1bdc43dd203396647a03a50e64d67707d8322b5 (diff)
llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load
Fix loading of a 3x16 vector as a single 48-bit load on big-endian systems (PPC64, S390). Roland Scheidegger's commit e827d9175675aaa6cfc0b981e2a80685fb7b3a74 plus Ray Strode's patch reduce pre-Roland Piglit failures from ~4000 to ~2000. This patch fixes three of the four regressions observed by Ray: - draw-vertices - draw-vertices-half-float - draw-vertices-half-float_gles2 One regression remains: - draw-vertices-2101010 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100613 Cc: "17.2" "17.1" <[email protected]> Signed-off-by: Ben Crocker <[email protected]> Reviewed-by: Roland Scheidegger <[email protected]> (cherry picked from commit 57c8ead0cd08e6aaf88a389f7ce528c4f0face65)
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_gather.c30
1 files changed, 28 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
index ccd03765c73..7d11dcd3b64 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -234,13 +234,39 @@ lp_build_gather_elem_vec(struct gallivm_state *gallivm,
*/
res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
- if (vector_justify) {
#ifdef PIPE_ARCH_BIG_ENDIAN
+ if (vector_justify) {
res = LLVMBuildShl(gallivm->builder, res,
LLVMConstInt(dst_elem_type,
dst_type.width - src_width, 0), "");
-#endif
}
+ if (src_width == 48) {
+ /* Load 3x16 bit vector.
+ * The sequence of loads on big-endian hardware proceeds as follows.
+ * 16-bit fields are denoted by X, Y, Z, and 0. In memory, the sequence
+ * of three fields appears in the order X, Y, Z.
+ *
+ * Load 32-bit word: 0.0.X.Y
+ * Load 16-bit halfword: 0.0.0.Z
+ * Rotate left: 0.X.Y.0
+ * Bitwise OR: 0.X.Y.Z
+ *
+ * The order in which we need the fields in the result is 0.Z.Y.X,
+ * the same as on little-endian; permute 16-bit fields accordingly
+ * within 64-bit register:
+ */
+ LLVMValueRef shuffles[4] = {
+ lp_build_const_int32(gallivm, 2),
+ lp_build_const_int32(gallivm, 1),
+ lp_build_const_int32(gallivm, 0),
+ lp_build_const_int32(gallivm, 3),
+ };
+ res = LLVMBuildBitCast(gallivm->builder, res,
+ lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
+ res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
+ res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
+ }
+#endif
}
}
return res;