aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorlloyd <[email protected]>2011-05-13 15:14:10 +0000
committerlloyd <[email protected]>2011-05-13 15:14:10 +0000
commit3ead4139654e745f3b86db5f49dde8636909e511 (patch)
tree1f454b33bcbb8cb4a4b3311d8b38e969fe10d65c /src
parentc7259bfc91e1fd5c9db642e59d13dd5b835517cd (diff)
The new method of doing comparisons did not work all of the time: if
the low bytes were equal, then the saturating subtraction result in that byte would be 0 with the high byte containing a non-zero value. To deal with this, shift and or together the two values into the low byte. Add some new tests which check out the SIMD implementation more carefully, including values that trigger the problem in the earlier version.
Diffstat (limited to 'src')
-rw-r--r--src/block/idea_sse2/idea_sse2.cpp4
1 files changed, 3 insertions, 1 deletions
diff --git a/src/block/idea_sse2/idea_sse2.cpp b/src/block/idea_sse2/idea_sse2.cpp
index 81b0fd9c1..70698560d 100644
--- a/src/block/idea_sse2/idea_sse2.cpp
+++ b/src/block/idea_sse2/idea_sse2.cpp
@@ -28,7 +28,9 @@ inline __m128i mul(__m128i X, u16bit K_16)
__m128i T = _mm_sub_epi16(mul_lo, mul_hi);
// Unsigned compare; cmp = 1 if mul_lo < mul_hi else 0
- const __m128i cmp = _mm_min_epu8(ones, _mm_subs_epu16(mul_hi, mul_lo));
+ const __m128i subs = _mm_subs_epu16(mul_hi, mul_lo);
+ const __m128i cmp = _mm_min_epu8(
+ _mm_or_si128(subs, _mm_srli_epi16(subs, 8)), ones);
T = _mm_add_epi16(T, cmp);