diff options
author | lloyd <[email protected]> | 2013-12-24 22:24:54 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2013-12-24 22:24:54 +0000 |
commit | bb9ca87f6bd9383cf27de115281cc9ea54284d3a (patch) | |
tree | 944c0e69b3387f04d60c1290c15ae7bbe719e5fe | |
parent | 32233249f38b7fcbce53699ab23c899352e17449 (diff) |
Add Threefish-512 in AVX2
Tested using Intel's SDE, but no idea how fast it will be on hardware.
-rw-r--r-- | checks/tests.cpp | 2 | ||||
-rw-r--r-- | checks/transform.cpp | 14 | ||||
-rw-r--r-- | checks/transform.vec | 6 | ||||
-rw-r--r-- | doc/relnotes/1_11_6.rst | 16 | ||||
-rw-r--r-- | src/block/threefish/info.txt | 2 | ||||
-rw-r--r-- | src/block/threefish/threefish.cpp | 72 | ||||
-rw-r--r-- | src/block/threefish/threefish.h | 7 | ||||
-rw-r--r-- | src/block/threefish_avx2/info.txt | 3 | ||||
-rw-r--r-- | src/block/threefish_avx2/threefish_avx2.cpp | 146 | ||||
-rw-r--r-- | src/block/threefish_avx2/threefish_avx2.h | 26 |
10 files changed, 252 insertions, 42 deletions
diff --git a/checks/tests.cpp b/checks/tests.cpp index db934735e..6da04307c 100644 --- a/checks/tests.cpp +++ b/checks/tests.cpp @@ -64,7 +64,7 @@ void run_tests(std::istream& src, const std::string got = cb(vars); if(got != vars[output_key]) { - std::cout << name_key << " got " << got + std::cout << name_key << ' ' << vars[name_key] << " got " << got << " expected " << vars[output_key] << std::endl; return false; } diff --git a/checks/transform.cpp b/checks/transform.cpp index 71ecc35e7..cfcc5bb51 100644 --- a/checks/transform.cpp +++ b/checks/transform.cpp @@ -15,16 +15,18 @@ namespace { Transformation* get_transform(const std::string& algo) { +#if defined(BOTAN_HAS_THREEFISH_512) if(algo == "Threefish-512") return new Threefish_512; +#endif throw std::runtime_error("Unknown transform " + algo); } secure_vector<byte> transform_test(const std::string& algo, - const secure_vector<byte>& nonce, - const secure_vector<byte>& key, - const secure_vector<byte>& in) + const secure_vector<byte>& nonce, + const secure_vector<byte>& key, + const secure_vector<byte>& in) { std::unique_ptr<Transformation> transform(get_transform(algo)); @@ -52,7 +54,11 @@ void test_transform() hex_decode_locked(m["Input"]))); }); - //time_transform("Threefish-512"); + if(true) + { + time_transform("Threefish-512"); + //time_transform("Threefish-512-AVX2"); + } } void time_transform(const std::string& algo) diff --git a/checks/transform.vec b/checks/transform.vec index 8f8a155e0..7399fb641 100644 --- a/checks/transform.vec +++ b/checks/transform.vec @@ -10,3 +10,9 @@ Input = FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDC Nonce = 000102030405060708090A0B0C0D0E0F Key = 101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F Output = E304439626D45A2CB401CAD8D636249A6338330EB06D45DD8B36B90E97254779272A0A8D99463504784420EA18C9A725AF11DFFEA10162348927673D5C1CAF3D + +Transform = Threefish-512 +Input = FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C0FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C1FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C2FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C3FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C4FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C5FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C6FFFEFDFCFBFAF9F8F7F6F5F4F3F2F1F0EFEEEDECEBEAE9E8E7E6E5E4E3E2E1E0DFDEDDDCDBDAD9D8D7D6D5D4D3D2D1D0CFCECDCCCBCAC9C8C7C6C5C4C3C2C1C7 +Nonce = 000102030405060708090A0B0C0D0E0F +Key = 101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F +Output = E304439626D45A2CB401CAD8D636249A6338330EB06D45DD8B36B90E97254779272A0A8D99463504784420EA18C9A725AF11DFFEA10162348927673D5C1CAF3DEADB79B45750412A48DA8F29648E51C7DC6B6F1FBC7F9EAE86B200C4D757CF80A2DEDC32EC0C4294CB4715A38B55433238C3E8DB022BF2EAAC20369745D240F2030A3FBD7262ECD63DFD1FF67304CA958C4DCCDA484705B8D0A9DB6FE5ECB774D8506DF74A6103A56F311B0B1A38E7F7CE694825AC2141059FF81F013EF816A070C8511C219FDEE1F9B712475CA1E6D8737A880A15824025FAECDFA946129D66EEFF349B735DA1B28E24AD703671257C1FE94A927CA307750BFB83D8E8E2AC1AED9BDAA2CF8E737994552A65F1488BEDD7995EAD3E2BB1F83372B998516C5E0AD3C4F65FF0A832D6AC866D800D55837E7F054C37BD93C526A41D3C665DD513D8D7D4DE2DD8F5060CC7F6CEDA565B39398392FC53EC95B55FA755FA96D2585C8E21D26694F8DF5D839C462F5ACAB59610B90FB2B10858D29DE8336813DB3FE24C46CEA7E56574F9BE0D348B01FE3AF29D40B360838BF9DA4B28AC63F385EE05880A6A49A8E384E08656F7D3AEB5407658A27278923C56FDA8CE8F269A24776132B98AE9CEF3C02A0AC97D584913AD5C94B51B5256E9B790D083ACBB212DC6DA264B4E16AB60485F93A9226676988BF3307FD1083032FC10BEC1DA051757AB425C diff --git a/doc/relnotes/1_11_6.rst b/doc/relnotes/1_11_6.rst index 434d8c499..fbb6ca8bb 100644 --- a/doc/relnotes/1_11_6.rst +++ b/doc/relnotes/1_11_6.rst @@ -6,14 +6,16 @@ Version 1.11.6, Not Yet Released decoding if the input was too large to have been created for the associated key. - * Botan now requires Boost, specifically the filesystem and asio libraries. +* Botan now requires Boost, specifically the filesystem and asio libraries. - * The default TLS policy no longer includes RC4 in the cipher list, and - refuses to negotation SSLv3 by default. +* Add Threefish-512, including an implementation using AVX2 - * Add HKDF from :rfc:`5869` +* Add HKDF from :rfc:`5869` - * Add SIV from :rfc:`5297` +* Add SIV from :rfc:`5297` + +* TLS::Session_Manager_In_Memory's constructor now an rng reference argument + +* The default TLS policy no longer includes RC4 in the cipher list, and + refuses to negotation SSLv3 by default. - * TLS::Session_Manager_In_Memory now requires a rng to be passed to its - constructor. diff --git a/src/block/threefish/info.txt b/src/block/threefish/info.txt index 91fc7bfb1..b352a49df 100644 --- a/src/block/threefish/info.txt +++ b/src/block/threefish/info.txt @@ -1 +1 @@ -define THREEFISH 20131209 +define THREEFISH_512 20131220 diff --git a/src/block/threefish/threefish.cpp b/src/block/threefish/threefish.cpp index 6fb65a37e..ef639f03a 100644 --- a/src/block/threefish/threefish.cpp +++ b/src/block/threefish/threefish.cpp @@ -25,21 +25,22 @@ secure_vector<byte> Threefish_512::start(const byte tweak[], size_t tweak_len) return secure_vector<byte>(); } -void Threefish_512::update(secure_vector<byte>& blocks, size_t offset) +void Threefish_512::full_inplace_update(byte* buf, size_t sz) { - byte* buf = &blocks[offset]; - size_t sz = blocks.size() - offset; - - BOTAN_ASSERT(sz % update_granularity() == 0, "Block sized input"); - - BOTAN_ASSERT(m_T.size() == 3, "Tweak was set"); - -#define THREEFISH_ROUND(I1,I2,I3,I4,I5,I6,I7,I8,ROT1,ROT2,ROT3,ROT4) \ +#define THREEFISH_ROUND(X0,X1,X2,X3,X4,X5,X6,X7,ROT1,ROT2,ROT3,ROT4) \ do { \ - X##I1 += X##I2; X##I2 = rotate_left(X##I2, ROT1) ^ X##I1; \ - X##I3 += X##I4; X##I4 = rotate_left(X##I4, ROT2) ^ X##I3; \ - X##I5 += X##I6; X##I6 = rotate_left(X##I6, ROT3) ^ X##I5; \ - X##I7 += X##I8; X##I8 = rotate_left(X##I8, ROT4) ^ X##I7; \ + X0 += X4; \ + X1 += X5; \ + X2 += X6; \ + X3 += X7; \ + X4 = rotate_left(X4, ROT1); \ + X5 = rotate_left(X5, ROT2); \ + X6 = rotate_left(X6, ROT3); \ + X7 = rotate_left(X7, ROT4); \ + X4 ^= X0; \ + X5 ^= X1; \ + X6 ^= X2; \ + X7 ^= X3; \ } while(0) #define THREEFISH_INJECT_KEY(r) \ @@ -54,21 +55,21 @@ void Threefish_512::update(secure_vector<byte>& blocks, size_t offset) X7 += m_K[(r+7) % 9] + (r); \ } while(0) -#define THREEFISH_8_ROUNDS(R1,R2) \ - do { \ - THREEFISH_ROUND(0,1,2,3,4,5,6,7, 46,36,19,37); \ - THREEFISH_ROUND(2,1,4,7,6,5,0,3, 33,27,14,42); \ - THREEFISH_ROUND(4,1,6,3,0,5,2,7, 17,49,36,39); \ - THREEFISH_ROUND(6,1,0,7,2,5,4,3, 44, 9,54,56); \ - \ - THREEFISH_INJECT_KEY(R1); \ - \ - THREEFISH_ROUND(0,1,2,3,4,5,6,7, 39,30,34,24); \ - THREEFISH_ROUND(2,1,4,7,6,5,0,3, 13,50,10,17); \ - THREEFISH_ROUND(4,1,6,3,0,5,2,7, 25,29,39,43); \ - THREEFISH_ROUND(6,1,0,7,2,5,4,3, 8,35,56,22); \ - \ - THREEFISH_INJECT_KEY(R2); \ +#define THREEFISH_8_ROUNDS(R1,R2) \ + do { \ + THREEFISH_ROUND(X0,X2,X4,X6, X1,X3,X5,X7, 46,36,19,37); \ + THREEFISH_ROUND(X2,X4,X6,X0, X1,X7,X5,X3, 33,27,14,42); \ + THREEFISH_ROUND(X4,X6,X0,X2, X1,X3,X5,X7, 17,49,36,39); \ + THREEFISH_ROUND(X6,X0,X2,X4, X1,X7,X5,X3, 44, 9,54,56); \ + \ + THREEFISH_INJECT_KEY(R1); \ + \ + THREEFISH_ROUND(X0,X2,X4,X6, X1,X3,X5,X7, 39,30,34,24); \ + THREEFISH_ROUND(X2,X4,X6,X0, X1,X7,X5,X3, 13,50,10,17); \ + THREEFISH_ROUND(X4,X6,X0,X2, X1,X3,X5,X7, 25,29,39,43); \ + THREEFISH_ROUND(X6,X0,X2,X4, X1,X7,X5,X3, 8,35,56,22); \ + \ + THREEFISH_INJECT_KEY(R2); \ } while(0) while(sz) @@ -105,6 +106,17 @@ void Threefish_512::update(secure_vector<byte>& blocks, size_t offset) #undef THREEFISH_ROUND } +void Threefish_512::update(secure_vector<byte>& buf, size_t offset) + { + BOTAN_ASSERT(m_K.size() == 9, "Key was set"); + BOTAN_ASSERT(m_T.size() == 3, "Tweak was set"); + + const size_t sz = buf.size() - offset; + BOTAN_ASSERT(sz % block_size() == 0, "Block sized input"); + + full_inplace_update(&buf[offset], sz); + } + Key_Length_Specification Threefish_512::key_spec() const { return Key_Length_Specification(64); @@ -138,7 +150,7 @@ size_t Threefish_512::output_length(size_t input_length) const size_t Threefish_512::update_granularity() const { - return 64; // single block + return block_size(); } size_t Threefish_512::minimum_final_size() const @@ -148,6 +160,7 @@ size_t Threefish_512::minimum_final_size() const size_t Threefish_512::default_nonce_length() const { + // todo: define encoding for smaller nonces return 16; } @@ -159,6 +172,7 @@ bool Threefish_512::valid_nonce_length(size_t nonce_len) const void Threefish_512::clear() { zeroise(m_K); + zeroise(m_T); } } diff --git a/src/block/threefish/threefish.h b/src/block/threefish/threefish.h index b7806a93d..0e13221cc 100644 --- a/src/block/threefish/threefish.h +++ b/src/block/threefish/threefish.h @@ -40,6 +40,13 @@ class BOTAN_DLL Threefish_512 : public Transformation void clear(); + size_t block_size() const { return 64; } + + protected: + const secure_vector<u64bit>& get_T() const { return m_T; } + const secure_vector<u64bit>& get_K() const { return m_K; } + + virtual void full_inplace_update(byte* buf, size_t sz); private: void key_schedule(const byte key[], size_t key_len) override; diff --git a/src/block/threefish_avx2/info.txt b/src/block/threefish_avx2/info.txt new file mode 100644 index 000000000..3f62629b9 --- /dev/null +++ b/src/block/threefish_avx2/info.txt @@ -0,0 +1,3 @@ +define THREEFISH_512_AVX2 20131224 + +need_isa avx2 diff --git a/src/block/threefish_avx2/threefish_avx2.cpp b/src/block/threefish_avx2/threefish_avx2.cpp new file mode 100644 index 000000000..f77cfee47 --- /dev/null +++ b/src/block/threefish_avx2/threefish_avx2.cpp @@ -0,0 +1,146 @@ +/* +* Threefish-512 +* (C) 2013 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include <botan/threefish_avx2.h> +#include <immintrin.h> + +namespace Botan { + +namespace { + +inline void interleave_epi64(__m256i& X0, __m256i& X1) + { + // interleave X0 and X1 qwords + // (X0,X1,X2,X3),(X4,X5,X6,X7) -> (X0,X2,X4,X6),(X1,X3,X5,X7) + + const __m256i T0 = _mm256_unpacklo_epi64(X0, X1); + const __m256i T1 = _mm256_unpackhi_epi64(X0, X1); + + X0 = _mm256_permute4x64_epi64(T0, _MM_SHUFFLE(3,1,2,0)); + X1 = _mm256_permute4x64_epi64(T1, _MM_SHUFFLE(3,1,2,0)); + } + +inline void deinterleave_epi64(__m256i& X0, __m256i& X1) + { + const __m256i T0 = _mm256_permute4x64_epi64(X0, _MM_SHUFFLE(3,1,2,0)); + const __m256i T1 = _mm256_permute4x64_epi64(X1, _MM_SHUFFLE(3,1,2,0)); + + X0 = _mm256_unpacklo_epi64(T0, T1); + X1 = _mm256_unpackhi_epi64(T0, T1); + } + +} + +void Threefish_512_AVX2::full_inplace_update(byte* buf, size_t sz) + { + const u64bit* K = &get_K()[0]; + const u64bit* T_64 = &get_T()[0]; + + const __m256i ROTATE_1 = _mm256_set_epi64x(37,19,36,46); + const __m256i ROTATE_2 = _mm256_set_epi64x(42,14,27,33); + const __m256i ROTATE_3 = _mm256_set_epi64x(39,36,49,17); + const __m256i ROTATE_4 = _mm256_set_epi64x(56,54, 9,44); + + const __m256i ROTATE_5 = _mm256_set_epi64x(24,34,30,39); + const __m256i ROTATE_6 = _mm256_set_epi64x(17,10,50,13); + const __m256i ROTATE_7 = _mm256_set_epi64x(43,39,29,25); + const __m256i ROTATE_8 = _mm256_set_epi64x(22,56,35, 8); + +#define THREEFISH_ROUND(X0,X1,SHL) \ + do { \ + const __m256i SHR = _mm256_sub_epi64(_mm256_set1_epi64x(64), SHL); \ + X0 = _mm256_add_epi64(X0, X1); \ + X1 = _mm256_or_si256(_mm256_sllv_epi64(X1, SHL), _mm256_srlv_epi64(X1, SHR)); \ + X1 = _mm256_xor_si256(X1, X0); \ + X0 = _mm256_permute4x64_epi64(X0, _MM_SHUFFLE(0, 3, 2, 1)); \ + X1 = _mm256_permute4x64_epi64(X1, _MM_SHUFFLE(1, 2, 3, 0)); \ + } while(0) + +#define THREEFISH_INJECT_KEY(r, K0, K1, T0I, T1I) \ + do { \ + const __m256i T0 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(T0I, 0, 0, 0)); \ + const __m256i T1 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(0, T1I, 0, 0)); \ + X0 = _mm256_add_epi64(X0, K0); \ + X1 = _mm256_add_epi64(X1, K1); \ + X1 = _mm256_add_epi64(X1, R); \ + R = _mm256_add_epi64(R, ONE); \ + X0 = _mm256_add_epi64(X0, T0); \ + X1 = _mm256_add_epi64(X1, T1); \ + } while(0) + +#define THREEFISH_8_ROUNDS(R1, R2, K1, K2, K3, T0, T1, T2) \ + do { \ + THREEFISH_ROUND(X0, X1, ROTATE_1); \ + THREEFISH_ROUND(X0, X1, ROTATE_2); \ + THREEFISH_ROUND(X0, X1, ROTATE_3); \ + THREEFISH_ROUND(X0, X1, ROTATE_4); \ + \ + THREEFISH_INJECT_KEY(R1, K1, K2, T0, T1); \ + \ + THREEFISH_ROUND(X0,X1, ROTATE_5); \ + THREEFISH_ROUND(X0,X1, ROTATE_6); \ + THREEFISH_ROUND(X0,X1, ROTATE_7); \ + THREEFISH_ROUND(X0,X1, ROTATE_8); \ + \ + THREEFISH_INJECT_KEY(R2, K2, K3, T2, T0); \ + } while(0) + + /* + v1.0 key schedule: 9 ymm registers (only need 2 or 3) + (0,1,2,3),(4,5,6,7) [8] + then mutating with vpermq + */ + const __m256i K0 = _mm256_set_epi64x(K[6], K[4], K[2], K[0]); + const __m256i K1 = _mm256_set_epi64x(K[7], K[5], K[3], K[1]); + const __m256i K2 = _mm256_set_epi64x(K[8], K[6], K[4], K[2]); + const __m256i K3 = _mm256_set_epi64x(K[0], K[7], K[5], K[3]); + const __m256i K4 = _mm256_set_epi64x(K[1], K[8], K[6], K[4]); + const __m256i K5 = _mm256_set_epi64x(K[2], K[0], K[7], K[5]); + const __m256i K6 = _mm256_set_epi64x(K[3], K[1], K[8], K[6]); + const __m256i K7 = _mm256_set_epi64x(K[4], K[2], K[0], K[7]); + const __m256i K8 = _mm256_set_epi64x(K[5], K[3], K[1], K[8]); + + const __m256i ONE = _mm256_set_epi64x(1, 0, 0, 0); + + while(sz >= 64) + { + __m256i* buf_mm = reinterpret_cast<__m256i*>(buf); + + __m256i X0 = _mm256_loadu_si256(buf_mm); + __m256i X1 = _mm256_loadu_si256(buf_mm + 1); + + __m256i T = _mm256_set_epi64x(T_64[0], T_64[1], T_64[2], 0); + + __m256i R = _mm256_set1_epi64x(0); + + interleave_epi64(X0, X1); + + THREEFISH_INJECT_KEY(0, K0, K1, 2, 3); + + THREEFISH_8_ROUNDS( 1, 2, K1,K2,K3, 1, 2, 3); + THREEFISH_8_ROUNDS( 3, 4, K3,K4,K5, 2, 3, 1); + THREEFISH_8_ROUNDS( 5, 6, K5,K6,K7, 3, 1, 2); + + THREEFISH_8_ROUNDS( 7, 8, K7,K8,K0, 1, 2, 3); + THREEFISH_8_ROUNDS( 9, 10, K0,K1,K2, 2, 3, 1); + THREEFISH_8_ROUNDS(11, 12, K2,K3,K4, 3, 1, 2); + + THREEFISH_8_ROUNDS(13, 14, K4,K5,K6, 1, 2, 3); + THREEFISH_8_ROUNDS(15, 16, K6,K7,K8, 2, 3, 1); + THREEFISH_8_ROUNDS(17, 18, K8,K0,K1, 3, 1, 2); + + deinterleave_epi64(X0, X1); + + _mm256_storeu_si256(buf_mm, X0); + _mm256_storeu_si256(buf_mm + 1, X1); + + buf += 64; + sz -= 64; + } + } + +} diff --git a/src/block/threefish_avx2/threefish_avx2.h b/src/block/threefish_avx2/threefish_avx2.h new file mode 100644 index 000000000..f9a0b666f --- /dev/null +++ b/src/block/threefish_avx2/threefish_avx2.h @@ -0,0 +1,26 @@ +/* +* Threefish-512 in AVX2 +* (C) 2013 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_THREEFISH_AVX2_H__ +#define BOTAN_THREEFISH_AVX2_H__ + +#include <botan/threefish.h> + +namespace Botan { + +/** +* Threefish-512 +*/ +class BOTAN_DLL Threefish_512_AVX2 : public Threefish_512 + { + private: + void full_inplace_update(byte* buf, size_t sz) override; + }; + +} + +#endif |