aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorJames Benton <[email protected]>2012-09-13 16:04:42 +0100
committerJosé Fonseca <[email protected]>2012-11-28 19:14:36 +0000
commitfa1b481c09b14e01eca1b3db8e0854033f6dee3d (patch)
tree170810687d31e60041309682e8f923f409174077 /src/gallium/auxiliary
parent1d3789bccbbcc814fd7b339e9f5b5631e30d9f0e (diff)
llvmpipe: Unswizzled rendering.
Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c75
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.h10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c9
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c36
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h9
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c50
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.h9
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c22
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c137
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h25
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c4
12 files changed, 360 insertions, 29 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index af942ada2c2..cc442369630 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -415,6 +415,81 @@ lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm,
/**
+ * Pick a suitable num_dsts for lp_build_conv to ensure optimal cases are used.
+ *
+ * Returns the number of dsts created from src
+ */
+int lp_build_conv_auto(struct gallivm_state *gallivm,
+ struct lp_type src_type,
+ struct lp_type* dst_type,
+ const LLVMValueRef *src,
+ unsigned num_srcs,
+ LLVMValueRef *dst)
+{
+ int i;
+ int num_dsts = num_srcs;
+
+ if (src_type.floating == dst_type->floating &&
+ src_type.width == dst_type->width &&
+ src_type.length == dst_type->length &&
+ src_type.fixed == dst_type->fixed &&
+ src_type.norm == dst_type->norm &&
+ src_type.sign == dst_type->sign)
+ return num_dsts;
+
+ /* Special case 4x4f -> 1x16ub or 2x8f -> 1x16ub
+ */
+ if (src_type.floating == 1 &&
+ src_type.fixed == 0 &&
+ src_type.sign == 1 &&
+ src_type.norm == 0 &&
+ src_type.width == 32 &&
+
+ dst_type->floating == 0 &&
+ dst_type->fixed == 0 &&
+ dst_type->sign == 0 &&
+ dst_type->norm == 1 &&
+ dst_type->width == 8)
+ {
+ /* Special case 4x4f --> 1x16ub */
+ if (src_type.length == 4 && util_cpu_caps.has_sse2)
+ {
+ assert((num_srcs % 4) == 0);
+
+ num_dsts = num_srcs / 4;
+ dst_type->length = 16;
+
+ lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
+ return num_dsts;
+ }
+
+ /* Special case 2x8f --> 1x16ub */
+ if (src_type.length == 8 && util_cpu_caps.has_avx)
+ {
+ assert((num_srcs % 2) == 0);
+
+ num_dsts = num_srcs / 2;
+ dst_type->length = 16;
+
+ lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
+ return num_dsts;
+ }
+ }
+
+ /* lp_build_resize does not support M:N */
+ if (src_type.width == dst_type->width) {
+ lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
+ } else {
+ for (i = 0; i < num_srcs; ++i) {
+ lp_build_conv(gallivm, src_type, *dst_type, &src[i], 1, &dst[i], 1);
+ }
+ }
+
+ return num_dsts;
+}
+
+
+/**
* Generic type conversion.
*
* TODO: Take a precision argument, or even better, add a new precision member
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
index ef221051bcd..42a11137473 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
@@ -70,6 +70,16 @@ lp_build_conv(struct gallivm_state *gallivm,
const LLVMValueRef *srcs, unsigned num_srcs,
LLVMValueRef *dsts, unsigned num_dsts);
+
+int
+lp_build_conv_auto(struct gallivm_state *gallivm,
+ struct lp_type src_type,
+ struct lp_type* dst_type,
+ const LLVMValueRef *src,
+ unsigned num_srcs,
+ LLVMValueRef *dst);
+
+
void
lp_build_conv_mask(struct gallivm_state *gallivm,
struct lp_type src_type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 8a77a43dae8..f56b61bf248 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -560,7 +560,8 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
unsigned mask,
LLVMValueRef a,
- LLVMValueRef b)
+ LLVMValueRef b,
+ unsigned num_channels)
{
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
@@ -594,8 +595,8 @@ lp_build_select_aos(struct lp_build_context *bld,
LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
+ for(j = 0; j < n; j += num_channels)
+ for(i = 0; i < num_channels; ++i)
shuffles[j + i] = LLVMConstInt(elem_type,
(mask & (1 << i) ? 0 : n) + j + i,
0);
@@ -603,7 +604,7 @@ lp_build_select_aos(struct lp_build_context *bld,
return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
}
else {
- LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, 4);
+ LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
return lp_build_select(bld, mask_vec, a, b);
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index 64c0a1f5946..f5304240a59 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -79,7 +79,8 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
unsigned mask,
LLVMValueRef a,
- LLVMValueRef b);
+ LLVMValueRef b,
+ unsigned num_channels);
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index e57d4148870..b467d561e36 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -211,6 +211,42 @@ lp_build_concat(struct gallivm_state *gallivm,
return tmp[0];
}
+
+/**
+ * Combines vectors to reduce from num_srcs to num_dsts.
+ * Returns the number of src vectors concatenated in a single dst.
+ *
+ * num_srcs must be exactly divisible by num_dsts.
+ *
+ * e.g. For num_srcs = 4 and src = [x, y, z, w]
+ * num_dsts = 1 dst = [xyzw] return = 4
+ * num_dsts = 2 dst = [xy, zw] return = 2
+ */
+int
+lp_build_concat_n(struct gallivm_state *gallivm,
+ struct lp_type src_type,
+ LLVMValueRef *src,
+ unsigned num_srcs,
+ LLVMValueRef *dst,
+ unsigned num_dsts)
+{
+ int size = num_srcs / num_dsts;
+ int i;
+
+ assert(num_srcs >= num_dsts);
+ assert((num_srcs % size) == 0);
+
+ if (num_srcs == num_dsts)
+ return 1;
+
+ for (i = 0; i < num_dsts; ++i) {
+ dst[i] = lp_build_concat(gallivm, &src[i * size], src_type, size);
+ }
+
+ return size;
+}
+
+
/**
* Interleave vector elements.
*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index f734c60b1d8..7cede35bbde 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -87,6 +87,15 @@ lp_build_concat(struct gallivm_state *gallivm,
struct lp_type src_type,
unsigned num_vectors);
+int
+lp_build_concat_n(struct gallivm_state *gallivm,
+ struct lp_type src_type,
+ LLVMValueRef *src,
+ unsigned num_srcs,
+ LLVMValueRef *dst,
+ unsigned num_dsts);
+
+
LLVMValueRef
lp_build_packs2(struct gallivm_state *gallivm,
struct lp_type src_type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index c7c58edd5a7..8a0efed655f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -31,6 +31,7 @@
#include "lp_bld_const.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_quad.h"
+#include "lp_bld_pack.h"
static const unsigned char
@@ -156,3 +157,52 @@ lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
}
+
+/**
+ * Twiddle from quad format to row format
+ *
+ * src0 src1
+ * ######### ######### #################
+ * # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0
+ * #---+---# #---+---# -> #################
+ * # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1
+ * ######### ######### #################
+ *
+ */
+void
+lp_bld_quad_twiddle(struct gallivm_state *gallivm,
+ struct lp_type lp_dst_type,
+ const LLVMValueRef* src,
+ unsigned src_count,
+ LLVMValueRef* dst)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef dst_type_ref;
+ LLVMTypeRef type2_ref;
+ struct lp_type type2;
+ unsigned i;
+
+ assert((src_count % 2) == 0);
+
+ /* Create a type with only 2 elements */
+ type2 = lp_dst_type;
+ type2.width = (lp_dst_type.width * lp_dst_type.length) / 2;
+ type2.length = 2;
+ type2.floating = 0;
+
+ type2_ref = lp_build_vec_type(gallivm, type2);
+ dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type);
+
+ for (i = 0; i < src_count; i += 2) {
+ LLVMValueRef src0, src1;
+
+ src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, "");
+ src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, "");
+
+ dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0);
+ dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1);
+
+ dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, "");
+ dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, "");
+ }
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.h b/src/gallium/auxiliary/gallivm/lp_bld_quad.h
index be6a1efc396..e41f80efe2d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.h
@@ -88,5 +88,14 @@ LLVMValueRef
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
LLVMValueRef a);
+/*
+ * Twiddle from quad format to row format
+ */
+void
+lp_bld_quad_twiddle(struct gallivm_state *gallivm,
+ struct lp_type lp_dst_type,
+ const LLVMValueRef* src,
+ unsigned src_count,
+ LLVMValueRef* dst);
#endif /* LP_BLD_QUAD_H_ */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 37490e47c85..8ea5f5e01dc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -772,7 +772,7 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
offset1 = LLVMBuildLoad(builder, offset1, "");
offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
}
- offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0);
+ offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
}
else {
unsigned i;
@@ -849,7 +849,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
stride1 = LLVMBuildLoad(builder, stride1, "");
stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
}
- stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0);
+ stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
}
else {
LLVMValueRef stride1;
@@ -1045,11 +1045,11 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
*out_width = size;
}
else if (bld->num_lods == num_quads) {
- *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0);
+ *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
if (dims >= 2) {
- *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1);
+ *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
if (dims == 3) {
- *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2);
+ *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
}
}
}
@@ -1246,9 +1246,9 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
signrxyz = LLVMBuildBitCast(builder, rxyz, lp_build_vec_type(gallivm, intctype), "");
signrxyz = LLVMBuildAnd(builder, signrxyz, signmask, "");
- arxs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 0);
- arys = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 1);
- arzs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 2);
+ arxs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 0, 4);
+ arys = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 1, 4);
+ arzs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 2, 4);
/*
* select x if x >= y else select y
@@ -1267,15 +1267,15 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
* snewz = signrz * rx;
* tnewz = -ry;
*/
- signrxs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 0);
+ signrxs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 0, 4);
snewx = LLVMBuildXor(builder, signrxs, rzneg, "");
tnewx = ryneg;
- signrys = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 1);
+ signrys = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 1, 4);
snewy = rx;
tnewy = LLVMBuildXor(builder, signrys, rz, "");
- signrzs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 2);
+ signrzs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 2, 4);
snewz = LLVMBuildXor(builder, signrzs, rx, "");
tnewz = ryneg;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 4ae4f3752a8..377884a78cf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -159,21 +159,24 @@ lp_build_extract_broadcast(struct gallivm_state *gallivm,
/**
- * Swizzle one channel into all other three channels.
+ * Swizzle one channel into other channels.
*/
LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned channel)
+ unsigned channel,
+ unsigned num_channels)
{
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
- if(a == bld->undef || a == bld->zero || a == bld->one)
+ if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
return a;
+ assert(num_channels == 2 || num_channels == 4);
+
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
* using shuffles here actually causes worst results. More investigation is
* needed. */
@@ -184,12 +187,55 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
+ for(j = 0; j < n; j += num_channels)
+ for(i = 0; i < num_channels; ++i)
shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
}
+ else if (num_channels == 2) {
+ /*
+ * Bit mask and shifts
+ *
+ * XY XY .... XY <= input
+ * 0Y 0Y .... 0Y
+ * YY YY .... YY
+ * YY YY .... YY <= output
+ */
+ struct lp_type type2;
+ LLVMValueRef tmp = NULL;
+ int shift;
+
+ a = LLVMBuildAnd(builder, a,
+ lp_build_const_mask_aos(bld->gallivm,
+ type, 1 << channel, num_channels), "");
+
+ type2 = type;
+ type2.floating = FALSE;
+ type2.width *= 2;
+ type2.length /= 2;
+
+ a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ shift = channel == 0 ? 1 : -1;
+#else
+ shift = channel == 0 ? -1 : 1;
+#endif
+
+ if (shift > 0) {
+ tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
+ } else if (shift < 0) {
+ tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
+ }
+
+ assert(tmp);
+ if (tmp) {
+ a = LLVMBuildOr(builder, a, tmp, "");
+ }
+
+ return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
+ }
else {
/*
* Bit mask and recursive shifts
@@ -247,6 +293,45 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
}
+/**
+ * Swizzle a vector consisting of an array of XYZW structs.
+ *
+ * This fills a vector of dst_len length with the swizzled channels from src.
+ *
+ * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
+ * RGBA RGBA = BGR BGR BG
+ *
+ * @param swizzles the swizzle array
+ * @param num_swizzles the number of elements in swizzles
+ * @param dst_len the length of the result
+ */
+LLVMValueRef
+lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
+ LLVMValueRef src,
+ const unsigned char* swizzles,
+ unsigned num_swizzles,
+ unsigned dst_len)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
+ unsigned i;
+
+ assert(dst_len < LP_MAX_VECTOR_WIDTH);
+
+ for (i = 0; i < dst_len; ++i) {
+ int swizzle = swizzles[i % num_swizzles];
+
+ if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
+ shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+ } else {
+ shuffles[i] = lp_build_const_int32(gallivm, swizzle);
+ }
+ }
+
+ return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
+}
+
+
LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context *bld,
LLVMValueRef a,
@@ -272,7 +357,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
case PIPE_SWIZZLE_GREEN:
case PIPE_SWIZZLE_BLUE:
case PIPE_SWIZZLE_ALPHA:
- return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
+ return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
case PIPE_SWIZZLE_ZERO:
return bld->zero;
case PIPE_SWIZZLE_ONE:
@@ -367,7 +452,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
cond |= 1 << chan;
}
}
- res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
+ res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
/*
* Build a type where each element is an integer that cover the four
@@ -554,6 +639,44 @@ lp_build_transpose_aos(struct gallivm_state *gallivm,
/**
+ * Transpose from AOS <-> SOA for num_srcs
+ */
+void
+lp_build_transpose_aos_n(struct gallivm_state *gallivm,
+ struct lp_type type,
+ const LLVMValueRef* src,
+ unsigned num_srcs,
+ LLVMValueRef* dst)
+{
+ switch (num_srcs) {
+ case 1:
+ dst[0] = src[0];
+ break;
+
+ case 2:
+ {
+ /* Note: we must use a temporary incase src == dst */
+ LLVMValueRef lo, hi;
+
+ lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
+ hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
+
+ dst[0] = lo;
+ dst[1] = hi;
+ break;
+ }
+
+ case 4:
+ lp_build_transpose_aos(gallivm, type, src, dst);
+ break;
+
+ default:
+ assert(0);
+ };
+}
+
+
+/**
* Pack n-th element of aos values,
* pad out to destination size.
* i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index c49d9167231..91ecd341476 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -67,13 +67,14 @@ lp_build_extract_broadcast(struct gallivm_state *gallivm,
/**
- * Broadcast one channel of a vector composed of arrays of XYZW structures into
- * all four channel.
+ * Broadcast one channel of a vector composed of arrays of XYZ.. structures into
+ * all channels XXX...
*/
LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- unsigned channel);
+ LLVMValueRef a,
+ unsigned channel,
+ unsigned num_channels);
/**
@@ -88,6 +89,14 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
LLVMValueRef
+lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
+ LLVMValueRef src,
+ const unsigned char* swizzles,
+ unsigned num_swizzles,
+ unsigned dst_len);
+
+
+LLVMValueRef
lp_build_swizzle_soa_channel(struct lp_build_context *bld,
const LLVMValueRef *unswizzled,
unsigned swizzle);
@@ -113,6 +122,14 @@ lp_build_transpose_aos(struct gallivm_state *gallivm,
LLVMValueRef dst[4]);
+void
+lp_build_transpose_aos_n(struct gallivm_state *gallivm,
+ struct lp_type type,
+ const LLVMValueRef* src,
+ unsigned num_srcs,
+ LLVMValueRef* dst);
+
+
LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
struct lp_type src_type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 44f684a1d01..dbd9ccb3b62 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -94,7 +94,7 @@ swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
unsigned chan)
{
chan = bld->swizzles[chan];
- return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
+ return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
}
@@ -623,7 +623,7 @@ lp_emit_instruction_aos(
case TGSI_OPCODE_EX2:
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
- tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
+ tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
break;