diff options
author | José Fonseca <[email protected]> | 2009-08-04 12:32:24 +0100 |
---|---|---|
committer | José Fonseca <[email protected]> | 2009-08-29 09:21:23 +0100 |
commit | 28e46458bca3065baf0424b20e5b72cb672069e6 (patch) | |
tree | 0511dfc91409219ebbbc8640361dc26a1f5f5daa | |
parent | 1dd7bb17c7331f9ecd0bc830b61ada235a56fe6d (diff) |
llvmpipe: Some notes about PSHUF.
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_swizzle.c | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c index 0205d17ff17..8cda4a48ba3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c @@ -45,6 +45,9 @@ lp_build_broadcast_aos(struct lp_build_context *bld, if(a == bld->undef || a == bld->zero || a == bld->one) return a; + /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing + * using shuffles here actually causes worst results. More investigation is + * needed. */ if (n <= 4) { /* * Shuffle. @@ -62,10 +65,10 @@ lp_build_broadcast_aos(struct lp_build_context *bld, /* * Bit mask and recursive shifts * - * XYZW XYZW .... XYZW - * _Y__ _Y__ .... _Y__ - * YY_ YY__ .... YY__ - * YYYY YYYY .... YYYY + * XYZW XYZW .... XYZW <= input + * 0Y00 0Y00 .... 0Y00 + * YY00 YY00 .... YY00 + * YYYY YYYY .... YYYY <= output */ union lp_type type4 = type; const char shifts[4][2] = { |