summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJosé Fonseca <[email protected]>2009-08-04 12:32:24 +0100
committerJosé Fonseca <[email protected]>2009-08-29 09:21:23 +0100
commit28e46458bca3065baf0424b20e5b72cb672069e6 (patch)
tree0511dfc91409219ebbbc8640361dc26a1f5f5daa /src
parent1dd7bb17c7331f9ecd0bc830b61ada235a56fe6d (diff)
llvmpipe: Some notes about PSHUF.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
index 0205d17ff17..8cda4a48ba3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
@@ -45,6 +45,9 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
if(a == bld->undef || a == bld->zero || a == bld->one)
return a;
+ /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
+ * using shuffles here actually causes worst results. More investigation is
+ * needed. */
if (n <= 4) {
/*
* Shuffle.
@@ -62,10 +65,10 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/*
* Bit mask and recursive shifts
*
- * XYZW XYZW .... XYZW
- * _Y__ _Y__ .... _Y__
- * YY_ YY__ .... YY__
- * YYYY YYYY .... YYYY
+ * XYZW XYZW .... XYZW <= input
+ * 0Y00 0Y00 .... 0Y00
+ * YY00 YY00 .... YY00
+ * YYYY YYYY .... YYYY <= output
*/
union lp_type type4 = type;
const char shifts[4][2] = {