diff options
author | lloyd <[email protected]> | 2011-05-13 15:14:10 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2011-05-13 15:14:10 +0000 |
commit | 3ead4139654e745f3b86db5f49dde8636909e511 (patch) | |
tree | 1f454b33bcbb8cb4a4b3311d8b38e969fe10d65c | |
parent | c7259bfc91e1fd5c9db642e59d13dd5b835517cd (diff) |
The new method of doing comparisons did not work all of the time: if
the low bytes were equal, then the saturating subtraction result in
that byte would be 0 with the high byte containing a non-zero value.
To deal with this, shift and or together the two values into the low
byte.
Add some new tests which check out the SIMD implementation more
carefully, including values that trigger the problem in the earlier
version.
-rw-r--r-- | checks/validate.dat | 82 | ||||
-rw-r--r-- | src/block/idea_sse2/idea_sse2.cpp | 4 |
2 files changed, 85 insertions, 1 deletions
diff --git a/checks/validate.dat b/checks/validate.dat index 2e060abe3..6f264ea74 100644 --- a/checks/validate.dat +++ b/checks/validate.dat @@ -4748,6 +4748,8 @@ D6A025B07C037A6E1E0653E828FB9E3A3587CDDA5325D4DAA743D113D995D6AF E07306086FA442A42B107F7F355359DD972BF070C0C71FF5C37FA7C259C7E039 [IDEA] +7409000000000000:E18315C171B83765:ED1BCC9E9267925F3132BA3A8CF9B764 + D53FABBF94FF8B5F:1D0CB2AF1654820A:729A27ED8F5C3E8BAF16560D14C90B43 848F836780938169:D7E0468226D0FC56:729A27ED8F5C3E8BAF16560D14C90B43 819440CA2065D112:264A8BBA66959075:729A27ED8F5C3E8BAF16560D14C90B43 @@ -4775,6 +4777,86 @@ FAE6D2BEAA96826E0A141E28323C4650050A0F14191E2328050A0F14191E2328:\ 85DF52005608193D2F7DE750212FB7347B7314925DE59C097B7314925DE59C09:\ 00010002000300040005000600070008 +C309000000000000A02A000000000000B03D000000000000C942000000000000\ +2B4C000000000000A04E0000000000009857000000000000C860000000000000\ +0063000000000000F2660000000000008698000000000000729D000000000000\ +34A000000000000023A500000000000010AE00000000000025AE000000000000\ +30D600000000000064DB000000000000BCE1000000000000F6E7000000000000\ +4AEC00000000000080F9000000000000E0FE00000000000061FF000000000000:\ +9C3C4F44BB50DF7367DAD70E6FED04E0AEB0344116C6E41F66A1A304E822132D\ +8AFCC1727259D93DD6E742EAEF2FD8C03EAD7890DC4EFACBB8776F3439A3DB1B\ +55D47DC6BC4A43349BA9E85FE178CD1ADBDD4E9D19CA1E7659341251586E1386\ +4A8C4E93A2616A0C18890A622452AD9FD09CB1A9CDC83ABF2FCFA325FA011731\ +9C924852D426132D05DA82EEBC3C261A6036C6477FBE3F65C40B8B02C2F9D8C8\ +B3084034AB3873CF22F20759C145ECCE92CE6B557D6DB959DA0B8AD4E0DFBCEA:\ +F2022315280960F16FD09741D13F693A + +7CC254F81BE8E78D765A2E63339FC99A66320DB73158A35A255D051758E95ED4\ +ABB2CDC69BB454110E827441213DDC8770E93EA141E1FC673E017E97EADC6B96\ +8F385C2AECB03BFB32AF3C54EC18DB5C021AFE43FBFAAA3AFB29D1E6053C7C94\ +75D8BE6189F95CBBA8990F95B1EBF1B305EFF700E9A13AE5CA0BCBD0484764BD\ +1F231EA81C7B64C514735AC55E4B79633B706424119E09DCAAD4ACF21B10AF3B\ +33CDE3504847155CBB6F2219BA9B7DF50BE11A1C7F23F829F8A41B13B5CA4EE8\ +983238E0794D3D34BC5F4E77FACB6C05AC86212BAA1A55A2BE70B5733B045CD3\ +3694B3AFE2F0E49E4F321549FD824EA90870D4B28A2954489A0ABCD50E18A844\ +AC5BF38E4CD72D9B0942E506C433AFCDA3847F2DADD47647DE321CEC4AC430F6\ +2023856CFBB20704F4EC0BB920BA86C33E05F1ECD96733B79950A3E314D3D934\ +F75EA0F210A8F6059401BEB4BC4478FA4969E623D01ADA696A7E4C7E5125B348\ +84533A94FB319990325744EE9BBCE9E525CF08F5E9E25E5360AAD2B2D085FA54\ +D835E8D466826498D9A8877565705A8A3F62802944DE7CA5894E5759D351ADAC\ +869580EC17E485F18C0C66F17CC07CBB22FCE466DA610B63AF62BC83B4692F3A\ +FFAF271693AC071FB86D11342D8DEF4F89D4B66335C1C7E4248367D8ED9612EC\ +453902D8E50AF89D7709D1A596C1F41F95AA82CA6C49AE90CD1668BAAC7AA6F2\ +B4A8CA99B2C2372ACB08CF61C9C3805E6E0328DA4CD76A19EDD2D3994C798B00\ +22569AD418D1FEE4D9CD45A391C601FFC92AD91501432FEE150287617C13629E\ +69FC7281CD7165A63EAB49CF714BCE3A75A74F76EA7E64FF81EB61FDFEC39B67\ +BF0DE98C7E4E32BDF97C8C6AC75BA43C02F4B2ED7216ECF3014DF000108B67CF\ +99505B179F8ED4980A6103D1BCA70DBE9BBFAB0ED59801D6E5F2D6F67D3EC516\ +8E212E2DAF02C6B963C98A1F7097DE0C56891A2B211B01070DD8FD8B16C2A1A4\ +E3CFD292D2984B3561D555D16C33DDC2BCF7EDDE13EFE520C7E2ABDDA44D8188\ +1C531AEEEB66244C3B791EA8ACFB6A68F3584606472B260E0DD2EBB21F6C3A3B\ +C0542AABBA4EF8F6C7169E731108DB0460220AA74D31B55B03A00D220D475DCD\ +9B877856D5704C9C86EA0F98F2EB9C530DA7FA5AD8B0B5DB50C2FD5D095A2AA5\ +E2A3FBB71347549A316332234ECE765B7571B64D216B28712E25CF3780F9DC62\ +9CD719B01E6D4A4FD17C731F4AE97BC05A310D7B9C36EDCA5BBC02DBB5DE3D52\ +B65702D4C44C2495C897B5128030D2DB61E056FD1643C871FFCA4DB5A88A075E\ +E10933A655573B1DEEF02F6E20024981E2A07FF8E34769E311B698B9419F1822\ +A84BC8FDA2041A90F449FE154B48962DE81525CB5C8FAE6D45462786E53FA98D\ +8A718A2C75A4BC6AEEBA7F39021567EA2B8CB6871B64F561AB1CE7905B901EE5:\ +9C142A22EDF81444F47272B80A037C169E304393537CECE8003BD80F7B054406\ +3B4A141F9A99D3C6820BAD98BECD914804F389EB2A50E1E2CF22161FC78B9366\ +0E07E2686E70AC0715299C4796F3559FDA802E61CB4ABBF42BAE516BD09FA410\ +085A0A92C6F32A3797D19808D3B3D049B605852E970E5A1B8031D3DC34B5A273\ +F54ED35E21D780204F4B3C512596237153BE9FAF74A44E9A9DCBE96D628AA58B\ +1E3363A94DF540230B38A1ACA440432640E5387D92F1CC1A16F8628A4CB6229F\ +513AB926300668CF97B27643C9C9D0C3030D0CDFBBCB69C3DB199E5D392A97A5\ +1DE6C9881AE5612A69FA0EA026F2F254B929201AFB3AFC8D977C3ED6E12F0118\ +92037D0F49B0144E07A0F0556F0BAC9B3F829C233265439AF711E0B5DD6EC813\ +FD51281E8AA6F031B096C64EE8F03E041FE4DC6B5441141F2D4A308CE8EA77C6\ +483E3CF565EC49CF27A0B13F28D3C63AD7FB6B3A96579D30C9D65F7BA86E56DA\ +6D14AF3C7D170CB5BF5F21C70C1771354DA2850CFF8D9250273828C1FE60C4AC\ +086049404E3D63E04935F03B057B4783B13CF49757A8B5ABB3D2E37E54B881D2\ +36F7DF7FE80E4AE33E9125F54AA96D96BFB15607F0800B215CBF9BB0F7E29080\ +D8504E9BC1F78256593B9565E5AA5FA22032A47041B453D1B154A8D24CD59CF9\ +AA6A8E55363F3DF2B6307ABA5134D67B0DF0AE4FE77F23BF7DF8504FE9DC7F32\ +A8562E2DF585E639847DD624E55B0D0DCCDA72D0F1E072D82D4BC135DC5F7F91\ +30956D401FAB1456527FE087A436C1511CDFEA58202D200E1817E360E8400AAE\ +83B073A63596B033D7E83C6CAB7FDD7069C3B1718EAF60B937CD2458255E68FC\ +D9514FD14AA6E27EC76E75F95F0A678A0F64D49C1B9B8F8DA56DDB8CE640FF6E\ +7195F4A679165F9996F3DDF992E3CB4ED9E9084AFC0038E4BEFB467CC8170AF8\ +F004082BBCB137BBD45C124BE8CEDC89DD565A24830889CE4B9781FC18803BDA\ +1A0A4EB70DA35887B02F18CFF9329E2B7C31B0F5F0648E0508379B52C8FF91CE\ +F939A040A8C20F2F27ED65553680729A2181B3B3C4AA02BFF8DF0A9228A87BBF\ +52B48F473D0F9070C76E4DB6F09FFDFEB629BD0E1944B7016AF34187E2985AEA\ +E30D6480A58F649A0C858E1F1458388A9E822A306AC1AA7465882DE78F242EF7\ +B0CB45D68A057F00D8609587922C8FAD6F1A7FFA34BF2175FC516730A61CF82C\ +6F866C978CC292BCC1F91E6AF1785FCDAA9A43A01E6AEE91E222F8AF8C989F2A\ +4C50B7A1D45BC15E11E5E6E6EF720506B8DF564648BEBFE272C0A77D41295865\ +108150CDB3620970A37DB94F1CC35E434DC33434D99871F6141EB57C9E648AD1\ +BF70E2B7FCEB81EA871DD92F19C366EA532CA4A7BEF9242128B7ADDD308B58FF\ +F5594CB4156A03C6A6ED3F27E8DB20FB2F4208422B7E9E0A4E63A0122560CFBC:\ +67C6697351FF4AEC29CDBAABF2FBE346 + # Randomly generated by OpenSSL A1F4C5FC0AF894FB:1F88AD254A1653CB:69E2F555209FCA21ED36E0243F043537 55E31A38B2C91116:8D57CB7AFB401E55:BC0ED7C4A90FE4760B3D971F0F2589F6 diff --git a/src/block/idea_sse2/idea_sse2.cpp b/src/block/idea_sse2/idea_sse2.cpp index 81b0fd9c1..70698560d 100644 --- a/src/block/idea_sse2/idea_sse2.cpp +++ b/src/block/idea_sse2/idea_sse2.cpp @@ -28,7 +28,9 @@ inline __m128i mul(__m128i X, u16bit K_16) __m128i T = _mm_sub_epi16(mul_lo, mul_hi); // Unsigned compare; cmp = 1 if mul_lo < mul_hi else 0 - const __m128i cmp = _mm_min_epu8(ones, _mm_subs_epu16(mul_hi, mul_lo)); + const __m128i subs = _mm_subs_epu16(mul_hi, mul_lo); + const __m128i cmp = _mm_min_epu8( + _mm_or_si128(subs, _mm_srli_epi16(subs, 8)), ones); T = _mm_add_epi16(T, cmp); |