aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlloyd <[email protected]>2011-05-13 15:14:10 +0000
committerlloyd <[email protected]>2011-05-13 15:14:10 +0000
commit3ead4139654e745f3b86db5f49dde8636909e511 (patch)
tree1f454b33bcbb8cb4a4b3311d8b38e969fe10d65c
parentc7259bfc91e1fd5c9db642e59d13dd5b835517cd (diff)
The new method of doing comparisons did not work all of the time: if
the low bytes were equal, then the saturating subtraction result in that byte would be 0 with the high byte containing a non-zero value. To deal with this, shift and or together the two values into the low byte. Add some new tests which check out the SIMD implementation more carefully, including values that trigger the problem in the earlier version.
-rw-r--r--checks/validate.dat82
-rw-r--r--src/block/idea_sse2/idea_sse2.cpp4
2 files changed, 85 insertions, 1 deletions
diff --git a/checks/validate.dat b/checks/validate.dat
index 2e060abe3..6f264ea74 100644
--- a/checks/validate.dat
+++ b/checks/validate.dat
@@ -4748,6 +4748,8 @@ D6A025B07C037A6E1E0653E828FB9E3A3587CDDA5325D4DAA743D113D995D6AF
E07306086FA442A42B107F7F355359DD972BF070C0C71FF5C37FA7C259C7E039
[IDEA]
+7409000000000000:E18315C171B83765:ED1BCC9E9267925F3132BA3A8CF9B764
+
D53FABBF94FF8B5F:1D0CB2AF1654820A:729A27ED8F5C3E8BAF16560D14C90B43
848F836780938169:D7E0468226D0FC56:729A27ED8F5C3E8BAF16560D14C90B43
819440CA2065D112:264A8BBA66959075:729A27ED8F5C3E8BAF16560D14C90B43
@@ -4775,6 +4777,86 @@ FAE6D2BEAA96826E0A141E28323C4650050A0F14191E2328050A0F14191E2328:\
85DF52005608193D2F7DE750212FB7347B7314925DE59C097B7314925DE59C09:\
00010002000300040005000600070008
+C309000000000000A02A000000000000B03D000000000000C942000000000000\
+2B4C000000000000A04E0000000000009857000000000000C860000000000000\
+0063000000000000F2660000000000008698000000000000729D000000000000\
+34A000000000000023A500000000000010AE00000000000025AE000000000000\
+30D600000000000064DB000000000000BCE1000000000000F6E7000000000000\
+4AEC00000000000080F9000000000000E0FE00000000000061FF000000000000:\
+9C3C4F44BB50DF7367DAD70E6FED04E0AEB0344116C6E41F66A1A304E822132D\
+8AFCC1727259D93DD6E742EAEF2FD8C03EAD7890DC4EFACBB8776F3439A3DB1B\
+55D47DC6BC4A43349BA9E85FE178CD1ADBDD4E9D19CA1E7659341251586E1386\
+4A8C4E93A2616A0C18890A622452AD9FD09CB1A9CDC83ABF2FCFA325FA011731\
+9C924852D426132D05DA82EEBC3C261A6036C6477FBE3F65C40B8B02C2F9D8C8\
+B3084034AB3873CF22F20759C145ECCE92CE6B557D6DB959DA0B8AD4E0DFBCEA:\
+F2022315280960F16FD09741D13F693A
+
+7CC254F81BE8E78D765A2E63339FC99A66320DB73158A35A255D051758E95ED4\
+ABB2CDC69BB454110E827441213DDC8770E93EA141E1FC673E017E97EADC6B96\
+8F385C2AECB03BFB32AF3C54EC18DB5C021AFE43FBFAAA3AFB29D1E6053C7C94\
+75D8BE6189F95CBBA8990F95B1EBF1B305EFF700E9A13AE5CA0BCBD0484764BD\
+1F231EA81C7B64C514735AC55E4B79633B706424119E09DCAAD4ACF21B10AF3B\
+33CDE3504847155CBB6F2219BA9B7DF50BE11A1C7F23F829F8A41B13B5CA4EE8\
+983238E0794D3D34BC5F4E77FACB6C05AC86212BAA1A55A2BE70B5733B045CD3\
+3694B3AFE2F0E49E4F321549FD824EA90870D4B28A2954489A0ABCD50E18A844\
+AC5BF38E4CD72D9B0942E506C433AFCDA3847F2DADD47647DE321CEC4AC430F6\
+2023856CFBB20704F4EC0BB920BA86C33E05F1ECD96733B79950A3E314D3D934\
+F75EA0F210A8F6059401BEB4BC4478FA4969E623D01ADA696A7E4C7E5125B348\
+84533A94FB319990325744EE9BBCE9E525CF08F5E9E25E5360AAD2B2D085FA54\
+D835E8D466826498D9A8877565705A8A3F62802944DE7CA5894E5759D351ADAC\
+869580EC17E485F18C0C66F17CC07CBB22FCE466DA610B63AF62BC83B4692F3A\
+FFAF271693AC071FB86D11342D8DEF4F89D4B66335C1C7E4248367D8ED9612EC\
+453902D8E50AF89D7709D1A596C1F41F95AA82CA6C49AE90CD1668BAAC7AA6F2\
+B4A8CA99B2C2372ACB08CF61C9C3805E6E0328DA4CD76A19EDD2D3994C798B00\
+22569AD418D1FEE4D9CD45A391C601FFC92AD91501432FEE150287617C13629E\
+69FC7281CD7165A63EAB49CF714BCE3A75A74F76EA7E64FF81EB61FDFEC39B67\
+BF0DE98C7E4E32BDF97C8C6AC75BA43C02F4B2ED7216ECF3014DF000108B67CF\
+99505B179F8ED4980A6103D1BCA70DBE9BBFAB0ED59801D6E5F2D6F67D3EC516\
+8E212E2DAF02C6B963C98A1F7097DE0C56891A2B211B01070DD8FD8B16C2A1A4\
+E3CFD292D2984B3561D555D16C33DDC2BCF7EDDE13EFE520C7E2ABDDA44D8188\
+1C531AEEEB66244C3B791EA8ACFB6A68F3584606472B260E0DD2EBB21F6C3A3B\
+C0542AABBA4EF8F6C7169E731108DB0460220AA74D31B55B03A00D220D475DCD\
+9B877856D5704C9C86EA0F98F2EB9C530DA7FA5AD8B0B5DB50C2FD5D095A2AA5\
+E2A3FBB71347549A316332234ECE765B7571B64D216B28712E25CF3780F9DC62\
+9CD719B01E6D4A4FD17C731F4AE97BC05A310D7B9C36EDCA5BBC02DBB5DE3D52\
+B65702D4C44C2495C897B5128030D2DB61E056FD1643C871FFCA4DB5A88A075E\
+E10933A655573B1DEEF02F6E20024981E2A07FF8E34769E311B698B9419F1822\
+A84BC8FDA2041A90F449FE154B48962DE81525CB5C8FAE6D45462786E53FA98D\
+8A718A2C75A4BC6AEEBA7F39021567EA2B8CB6871B64F561AB1CE7905B901EE5:\
+9C142A22EDF81444F47272B80A037C169E304393537CECE8003BD80F7B054406\
+3B4A141F9A99D3C6820BAD98BECD914804F389EB2A50E1E2CF22161FC78B9366\
+0E07E2686E70AC0715299C4796F3559FDA802E61CB4ABBF42BAE516BD09FA410\
+085A0A92C6F32A3797D19808D3B3D049B605852E970E5A1B8031D3DC34B5A273\
+F54ED35E21D780204F4B3C512596237153BE9FAF74A44E9A9DCBE96D628AA58B\
+1E3363A94DF540230B38A1ACA440432640E5387D92F1CC1A16F8628A4CB6229F\
+513AB926300668CF97B27643C9C9D0C3030D0CDFBBCB69C3DB199E5D392A97A5\
+1DE6C9881AE5612A69FA0EA026F2F254B929201AFB3AFC8D977C3ED6E12F0118\
+92037D0F49B0144E07A0F0556F0BAC9B3F829C233265439AF711E0B5DD6EC813\
+FD51281E8AA6F031B096C64EE8F03E041FE4DC6B5441141F2D4A308CE8EA77C6\
+483E3CF565EC49CF27A0B13F28D3C63AD7FB6B3A96579D30C9D65F7BA86E56DA\
+6D14AF3C7D170CB5BF5F21C70C1771354DA2850CFF8D9250273828C1FE60C4AC\
+086049404E3D63E04935F03B057B4783B13CF49757A8B5ABB3D2E37E54B881D2\
+36F7DF7FE80E4AE33E9125F54AA96D96BFB15607F0800B215CBF9BB0F7E29080\
+D8504E9BC1F78256593B9565E5AA5FA22032A47041B453D1B154A8D24CD59CF9\
+AA6A8E55363F3DF2B6307ABA5134D67B0DF0AE4FE77F23BF7DF8504FE9DC7F32\
+A8562E2DF585E639847DD624E55B0D0DCCDA72D0F1E072D82D4BC135DC5F7F91\
+30956D401FAB1456527FE087A436C1511CDFEA58202D200E1817E360E8400AAE\
+83B073A63596B033D7E83C6CAB7FDD7069C3B1718EAF60B937CD2458255E68FC\
+D9514FD14AA6E27EC76E75F95F0A678A0F64D49C1B9B8F8DA56DDB8CE640FF6E\
+7195F4A679165F9996F3DDF992E3CB4ED9E9084AFC0038E4BEFB467CC8170AF8\
+F004082BBCB137BBD45C124BE8CEDC89DD565A24830889CE4B9781FC18803BDA\
+1A0A4EB70DA35887B02F18CFF9329E2B7C31B0F5F0648E0508379B52C8FF91CE\
+F939A040A8C20F2F27ED65553680729A2181B3B3C4AA02BFF8DF0A9228A87BBF\
+52B48F473D0F9070C76E4DB6F09FFDFEB629BD0E1944B7016AF34187E2985AEA\
+E30D6480A58F649A0C858E1F1458388A9E822A306AC1AA7465882DE78F242EF7\
+B0CB45D68A057F00D8609587922C8FAD6F1A7FFA34BF2175FC516730A61CF82C\
+6F866C978CC292BCC1F91E6AF1785FCDAA9A43A01E6AEE91E222F8AF8C989F2A\
+4C50B7A1D45BC15E11E5E6E6EF720506B8DF564648BEBFE272C0A77D41295865\
+108150CDB3620970A37DB94F1CC35E434DC33434D99871F6141EB57C9E648AD1\
+BF70E2B7FCEB81EA871DD92F19C366EA532CA4A7BEF9242128B7ADDD308B58FF\
+F5594CB4156A03C6A6ED3F27E8DB20FB2F4208422B7E9E0A4E63A0122560CFBC:\
+67C6697351FF4AEC29CDBAABF2FBE346
+
# Randomly generated by OpenSSL
A1F4C5FC0AF894FB:1F88AD254A1653CB:69E2F555209FCA21ED36E0243F043537
55E31A38B2C91116:8D57CB7AFB401E55:BC0ED7C4A90FE4760B3D971F0F2589F6
diff --git a/src/block/idea_sse2/idea_sse2.cpp b/src/block/idea_sse2/idea_sse2.cpp
index 81b0fd9c1..70698560d 100644
--- a/src/block/idea_sse2/idea_sse2.cpp
+++ b/src/block/idea_sse2/idea_sse2.cpp
@@ -28,7 +28,9 @@ inline __m128i mul(__m128i X, u16bit K_16)
__m128i T = _mm_sub_epi16(mul_lo, mul_hi);
// Unsigned compare; cmp = 1 if mul_lo < mul_hi else 0
- const __m128i cmp = _mm_min_epu8(ones, _mm_subs_epu16(mul_hi, mul_lo));
+ const __m128i subs = _mm_subs_epu16(mul_hi, mul_lo);
+ const __m128i cmp = _mm_min_epu8(
+ _mm_or_si128(subs, _mm_srli_epi16(subs, 8)), ones);
T = _mm_add_epi16(T, cmp);