aboutsummaryrefslogtreecommitdiffstats
path: root/src/block/idea_sse2
diff options
context:
space:
mode:
authorlloyd <[email protected]>2011-05-12 17:56:08 +0000
committerlloyd <[email protected]>2011-05-12 17:56:08 +0000
commitae7868f4985ce52cefeb5864715b53ecee78f740 (patch)
treefb33fad8be4cbc98be6ad604249aada897f345f1 /src/block/idea_sse2
parent44aa55d0aeb96d452450df5c024db7275a560925 (diff)
Fix the problem that prevented the SSE2 IDEA implementation from
working correctly under Clang - the technique for emulating unsigned compare relied on signed overflow. The new method does not, and works under GCC, ICC, and Clang. Even better, the compare takes only 2 instructions instead of 4. Prevent using any of the asm implementations under Clang on x86-32. All of them crash under Clang 2.9, unclear why.
Diffstat (limited to 'src/block/idea_sse2')
-rw-r--r--src/block/idea_sse2/idea_sse2.cpp6
1 files changed, 1 insertions, 5 deletions
diff --git a/src/block/idea_sse2/idea_sse2.cpp b/src/block/idea_sse2/idea_sse2.cpp
index b92f51ac3..81b0fd9c1 100644
--- a/src/block/idea_sse2/idea_sse2.cpp
+++ b/src/block/idea_sse2/idea_sse2.cpp
@@ -16,7 +16,6 @@ inline __m128i mul(__m128i X, u16bit K_16)
{
const __m128i zeros = _mm_set1_epi16(0);
const __m128i ones = _mm_set1_epi16(1);
- const __m128i high_bit = _mm_set1_epi16(-32767); // 0x8000
const __m128i K = _mm_set1_epi16(K_16);
@@ -29,10 +28,7 @@ inline __m128i mul(__m128i X, u16bit K_16)
__m128i T = _mm_sub_epi16(mul_lo, mul_hi);
// Unsigned compare; cmp = 1 if mul_lo < mul_hi else 0
- const __m128i cmp = _mm_srli_epi16(_mm_cmpgt_epi16(
- _mm_add_epi16(mul_hi, high_bit),
- _mm_add_epi16(mul_lo, high_bit)),
- 15);
+ const __m128i cmp = _mm_min_epu8(ones, _mm_subs_epu16(mul_hi, mul_lo));
T = _mm_add_epi16(T, cmp);