diff options
Diffstat (limited to 'src/lib/block/idea_sse2/idea_sse2.cpp')
-rw-r--r-- | src/lib/block/idea_sse2/idea_sse2.cpp | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/src/lib/block/idea_sse2/idea_sse2.cpp b/src/lib/block/idea_sse2/idea_sse2.cpp index 4debfc95a..1e63a8332 100644 --- a/src/lib/block/idea_sse2/idea_sse2.cpp +++ b/src/lib/block/idea_sse2/idea_sse2.cpp @@ -13,6 +13,7 @@ namespace Botan { namespace { +BOTAN_FUNC_ISA("sse2") inline __m128i mul(__m128i X, u16bit K_16) { const __m128i zeros = _mm_set1_epi16(0); @@ -61,6 +62,7 @@ inline __m128i mul(__m128i X, u16bit K_16) * that extra unpack could easily save 3-4 cycles per block, and would * also help a lot with register pressure on 32-bit x86 */ +BOTAN_FUNC_ISA("sse2") void transpose_in(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) { __m128i T0 = _mm_unpackhi_epi32(B0, B1); @@ -97,6 +99,7 @@ void transpose_in(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) /* * 4x8 matrix transpose (reverse) */ +BOTAN_FUNC_ISA("sse2") void transpose_out(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) { __m128i T0 = _mm_unpacklo_epi64(B0, B1); @@ -130,6 +133,7 @@ void transpose_out(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) /* * 8 wide IDEA encryption/decryption in SSE2 */ +BOTAN_FUNC_ISA("sse2") void IDEA::sse2_idea_op_8(const byte in[64], byte out[64], const u16bit EK[52]) const { CT::poison(in, 64); |