aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2016-10-14 12:03:22 -0400
committerJack Lloyd <[email protected]>2016-10-14 12:03:22 -0400
commit8b3bda479efecef760f052cc055d3d6d98bf0637 (patch)
tree77cd13fe2e62085c2f04bf437c1e845a20bf6a01
parent60fc0ebf44e746b17d46a9d53054064808476495 (diff)
Add ISA annotations to functions using SIMD, AES, etc
Also emit `#pragma GCC target` in the ISA specific amalgamation files. This allows compiling without any special compiler flags, at least with GCC 6.2 and Clang 3.8. The ISA annotations are ignored in MSVC, which just emits whatever instruction the intrinsic requires.
-rwxr-xr-xconfigure.py10
-rw-r--r--src/build-data/buildh.in9
-rw-r--r--src/lib/block/aes_ni/aes_ni.cpp12
-rw-r--r--src/lib/block/aes_ssse3/aes_ssse3.cpp18
-rw-r--r--src/lib/block/idea_sse2/idea_sse2.cpp4
-rw-r--r--src/lib/block/threefish_avx2/threefish_avx2.cpp5
-rw-r--r--src/lib/entropy/rdseed/rdseed.cpp1
-rw-r--r--src/lib/hash/sha1_sse2/sha1_sse2.cpp1
-rw-r--r--src/lib/modes/aead/gcm/clmul/clmul.cpp1
-rw-r--r--src/lib/rng/rdrand_rng/rdrand_rng.cpp1
-rw-r--r--src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp1
11 files changed, 62 insertions, 1 deletions
diff --git a/configure.py b/configure.py
index 037d47d54..d23716256 100755
--- a/configure.py
+++ b/configure.py
@@ -1783,6 +1783,16 @@ def generate_amalgamation(build_config, options):
if tgt not in botan_amalg_files:
botan_amalg_files[tgt] = open_amalg_file(tgt)
+
+ if tgt != '':
+ for isa in mod.need_isa:
+ if isa == 'aesni':
+ isa = "aes,ssse3,pclmul"
+ elif isa == 'rdrand':
+ isa = 'rdrnd'
+
+ botan_amalg_files[tgt].write('#if defined(__GNUG__)\n#pragma GCC target ("%s")\n#endif\n' % (isa))
+
if tgt not in headers_written:
headers_written[tgt] = headers_written_in_h_files.copy()
diff --git a/src/build-data/buildh.in b/src/build-data/buildh.in
index e943973a0..c84698804 100644
--- a/src/build-data/buildh.in
+++ b/src/build-data/buildh.in
@@ -199,8 +199,15 @@ Each read generates 32 bits of output
%{target_compiler_defines}
+
+#if defined(__GNUG__) || defined(__clang__)
+ #define BOTAN_FUNC_ISA(isa) __attribute__ ((target(isa)))
+#else
+ #define BOTAN_FUNC_ISA(isa)
+#endif
+
/*
-* Compile-time deprecatation warnings
+* Compile-time deprecation warnings
*/
#if !defined(BOTAN_NO_DEPRECATED_WARNINGS)
diff --git a/src/lib/block/aes_ni/aes_ni.cpp b/src/lib/block/aes_ni/aes_ni.cpp
index 3377f9d61..7518a6cf2 100644
--- a/src/lib/block/aes_ni/aes_ni.cpp
+++ b/src/lib/block/aes_ni/aes_ni.cpp
@@ -13,6 +13,7 @@ namespace Botan {
namespace {
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon)
{
key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(3,3,3,3));
@@ -22,6 +23,7 @@ __m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon)
return _mm_xor_si128(key, key_with_rcon);
}
+BOTAN_FUNC_ISA("ssse3")
void aes_192_key_expansion(__m128i* K1, __m128i* K2, __m128i key2_with_rcon,
u32bit out[], bool last)
{
@@ -51,6 +53,7 @@ void aes_192_key_expansion(__m128i* K1, __m128i* K2, __m128i key2_with_rcon,
/*
* The second half of the AES-256 key expansion (other half same as AES-128)
*/
+BOTAN_FUNC_ISA("ssse3,aes")
__m128i aes_256_key_expansion(__m128i key, __m128i key2)
{
__m128i key_with_rcon = _mm_aeskeygenassist_si128(key2, 0x00);
@@ -103,6 +106,7 @@ __m128i aes_256_key_expansion(__m128i key, __m128i key2)
/*
* AES-128 Encryption
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_128::aesni_encrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -179,6 +183,7 @@ void AES_128::aesni_encrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-128 Decryption
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_128::aesni_decrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -255,6 +260,7 @@ void AES_128::aesni_decrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-128 Key Schedule
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_128::aesni_key_schedule(const byte key[], size_t)
{
m_EK.resize(44);
@@ -307,6 +313,7 @@ void AES_128::aesni_key_schedule(const byte key[], size_t)
/*
* AES-192 Encryption
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_192::aesni_encrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -389,6 +396,7 @@ void AES_192::aesni_encrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-192 Decryption
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_192::aesni_decrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -471,6 +479,7 @@ void AES_192::aesni_decrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-192 Key Schedule
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_192::aesni_key_schedule(const byte key[], size_t)
{
m_EK.resize(52);
@@ -520,6 +529,7 @@ void AES_192::aesni_key_schedule(const byte key[], size_t)
/*
* AES-256 Encryption
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_256::aesni_encrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -608,6 +618,7 @@ void AES_256::aesni_encrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-256 Decryption
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_256::aesni_decrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -696,6 +707,7 @@ void AES_256::aesni_decrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-256 Key Schedule
*/
+BOTAN_FUNC_ISA("ssse3,aes")
void AES_256::aesni_key_schedule(const byte key[], size_t)
{
m_EK.resize(60);
diff --git a/src/lib/block/aes_ssse3/aes_ssse3.cpp b/src/lib/block/aes_ssse3/aes_ssse3.cpp
index eda39a7cc..ef24795bb 100644
--- a/src/lib/block/aes_ssse3/aes_ssse3.cpp
+++ b/src/lib/block/aes_ssse3/aes_ssse3.cpp
@@ -51,6 +51,7 @@ const __m128i sr[4] = {
#define mm_xor3(x, y, z) _mm_xor_si128(x, _mm_xor_si128(y, z))
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_schedule_transform(__m128i input,
__m128i table_1,
__m128i table_2)
@@ -63,6 +64,7 @@ __m128i aes_schedule_transform(__m128i input,
_mm_shuffle_epi8(table_2, i_2));
}
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_schedule_mangle(__m128i k, byte round_no)
{
__m128i t = _mm_shuffle_epi8(_mm_xor_si128(k, _mm_set1_epi8(0x5B)),
@@ -77,6 +79,7 @@ __m128i aes_schedule_mangle(__m128i k, byte round_no)
return _mm_shuffle_epi8(t2, sr[round_no % 4]);
}
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_schedule_192_smear(__m128i x, __m128i y)
{
return mm_xor3(y,
@@ -84,6 +87,7 @@ __m128i aes_schedule_192_smear(__m128i x, __m128i y)
_mm_shuffle_epi32(y, 0x80));
}
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_schedule_mangle_dec(__m128i k, byte round_no)
{
const __m128i dsk[8] = {
@@ -112,6 +116,7 @@ __m128i aes_schedule_mangle_dec(__m128i k, byte round_no)
return _mm_shuffle_epi8(output, sr[round_no % 4]);
}
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_schedule_mangle_last(__m128i k, byte round_no)
{
const __m128i out_tr1 = _mm_set_epi32(
@@ -124,6 +129,7 @@ __m128i aes_schedule_mangle_last(__m128i k, byte round_no)
return aes_schedule_transform(k, out_tr1, out_tr2);
}
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_schedule_mangle_last_dec(__m128i k)
{
const __m128i deskew1 = _mm_set_epi32(
@@ -135,6 +141,7 @@ __m128i aes_schedule_mangle_last_dec(__m128i k)
return aes_schedule_transform(k, deskew1, deskew2);
}
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_schedule_round(__m128i* rcon, __m128i input1, __m128i input2)
{
if(rcon)
@@ -170,6 +177,7 @@ __m128i aes_schedule_round(__m128i* rcon, __m128i input1, __m128i input2)
smeared);
}
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_ssse3_encrypt(__m128i B, const __m128i* keys, size_t rounds)
{
const __m128i sb2u = _mm_set_epi32(
@@ -239,6 +247,7 @@ __m128i aes_ssse3_encrypt(__m128i B, const __m128i* keys, size_t rounds)
}
}
+BOTAN_FUNC_ISA("ssse3")
__m128i aes_ssse3_decrypt(__m128i B, const __m128i* keys, size_t rounds)
{
const __m128i k_dipt1 = _mm_set_epi32(
@@ -336,6 +345,7 @@ __m128i aes_ssse3_decrypt(__m128i B, const __m128i* keys, size_t rounds)
/*
* AES-128 Encryption
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_128::ssse3_encrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -358,6 +368,7 @@ void AES_128::ssse3_encrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-128 Decryption
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_128::ssse3_decrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -380,6 +391,7 @@ void AES_128::ssse3_decrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-128 Key Schedule
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_128::ssse3_key_schedule(const byte keyb[], size_t)
{
__m128i rcon = _mm_set_epi32(0x702A9808, 0x4D7C7D81,
@@ -418,6 +430,7 @@ void AES_128::ssse3_key_schedule(const byte keyb[], size_t)
/*
* AES-192 Encryption
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_192::ssse3_encrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -440,6 +453,7 @@ void AES_192::ssse3_encrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-192 Decryption
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_192::ssse3_decrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -462,6 +476,7 @@ void AES_192::ssse3_decrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-192 Key Schedule
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_192::ssse3_key_schedule(const byte keyb[], size_t)
{
__m128i rcon = _mm_set_epi32(0x702A9808, 0x4D7C7D81,
@@ -529,6 +544,7 @@ void AES_192::ssse3_key_schedule(const byte keyb[], size_t)
/*
* AES-256 Encryption
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_256::ssse3_encrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -551,6 +567,7 @@ void AES_256::ssse3_encrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-256 Decryption
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_256::ssse3_decrypt_n(const byte in[], byte out[], size_t blocks) const
{
const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
@@ -573,6 +590,7 @@ void AES_256::ssse3_decrypt_n(const byte in[], byte out[], size_t blocks) const
/*
* AES-256 Key Schedule
*/
+BOTAN_FUNC_ISA("ssse3")
void AES_256::ssse3_key_schedule(const byte keyb[], size_t)
{
__m128i rcon = _mm_set_epi32(0x702A9808, 0x4D7C7D81,
diff --git a/src/lib/block/idea_sse2/idea_sse2.cpp b/src/lib/block/idea_sse2/idea_sse2.cpp
index 4debfc95a..1e63a8332 100644
--- a/src/lib/block/idea_sse2/idea_sse2.cpp
+++ b/src/lib/block/idea_sse2/idea_sse2.cpp
@@ -13,6 +13,7 @@ namespace Botan {
namespace {
+BOTAN_FUNC_ISA("sse2")
inline __m128i mul(__m128i X, u16bit K_16)
{
const __m128i zeros = _mm_set1_epi16(0);
@@ -61,6 +62,7 @@ inline __m128i mul(__m128i X, u16bit K_16)
* that extra unpack could easily save 3-4 cycles per block, and would
* also help a lot with register pressure on 32-bit x86
*/
+BOTAN_FUNC_ISA("sse2")
void transpose_in(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3)
{
__m128i T0 = _mm_unpackhi_epi32(B0, B1);
@@ -97,6 +99,7 @@ void transpose_in(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3)
/*
* 4x8 matrix transpose (reverse)
*/
+BOTAN_FUNC_ISA("sse2")
void transpose_out(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3)
{
__m128i T0 = _mm_unpacklo_epi64(B0, B1);
@@ -130,6 +133,7 @@ void transpose_out(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3)
/*
* 8 wide IDEA encryption/decryption in SSE2
*/
+BOTAN_FUNC_ISA("sse2")
void IDEA::sse2_idea_op_8(const byte in[64], byte out[64], const u16bit EK[52]) const
{
CT::poison(in, 64);
diff --git a/src/lib/block/threefish_avx2/threefish_avx2.cpp b/src/lib/block/threefish_avx2/threefish_avx2.cpp
index e0321812a..e4a46e3de 100644
--- a/src/lib/block/threefish_avx2/threefish_avx2.cpp
+++ b/src/lib/block/threefish_avx2/threefish_avx2.cpp
@@ -12,6 +12,7 @@ namespace Botan {
namespace {
+BOTAN_FUNC_ISA("avx2")
inline void interleave_epi64(__m256i& X0, __m256i& X1)
{
// interleave X0 and X1 qwords
@@ -24,6 +25,7 @@ inline void interleave_epi64(__m256i& X0, __m256i& X1)
X1 = _mm256_permute4x64_epi64(T1, _MM_SHUFFLE(3,1,2,0));
}
+BOTAN_FUNC_ISA("avx2")
inline void deinterleave_epi64(__m256i& X0, __m256i& X1)
{
const __m256i T0 = _mm256_permute4x64_epi64(X0, _MM_SHUFFLE(3,1,2,0));
@@ -33,6 +35,7 @@ inline void deinterleave_epi64(__m256i& X0, __m256i& X1)
X1 = _mm256_unpackhi_epi64(T0, T1);
}
+BOTAN_FUNC_ISA("avx2")
inline void rotate_keys(__m256i& R0, __m256i& R1, __m256i R2)
{
/*
@@ -71,6 +74,7 @@ inline void rotate_keys(__m256i& R0, __m256i& R1, __m256i R2)
}
+BOTAN_FUNC_ISA("avx2")
void Threefish_512::avx2_encrypt_n(const byte in[], byte out[], size_t blocks) const
{
const u64bit* K = &get_K()[0];
@@ -245,6 +249,7 @@ void Threefish_512::avx2_encrypt_n(const byte in[], byte out[], size_t blocks) c
#undef THREEFISH_INJECT_KEY_2
}
+BOTAN_FUNC_ISA("avx2")
void Threefish_512::avx2_decrypt_n(const byte in[], byte out[], size_t blocks) const
{
const u64bit* K = &get_K()[0];
diff --git a/src/lib/entropy/rdseed/rdseed.cpp b/src/lib/entropy/rdseed/rdseed.cpp
index 325edfd41..01cab3a7e 100644
--- a/src/lib/entropy/rdseed/rdseed.cpp
+++ b/src/lib/entropy/rdseed/rdseed.cpp
@@ -15,6 +15,7 @@
namespace Botan {
+BOTAN_FUNC_ISA("rdseed")
size_t Intel_Rdseed::poll(RandomNumberGenerator& rng) {
if(CPUID::has_rdseed())
{
diff --git a/src/lib/hash/sha1_sse2/sha1_sse2.cpp b/src/lib/hash/sha1_sse2/sha1_sse2.cpp
index 2ece541b0..0f88bb4c2 100644
--- a/src/lib/hash/sha1_sse2/sha1_sse2.cpp
+++ b/src/lib/hash/sha1_sse2/sha1_sse2.cpp
@@ -152,6 +152,7 @@ inline void F4(u32bit A, u32bit& B, u32bit C, u32bit D, u32bit& E, u32bit msg)
* SHA-160 Compression Function using SSE for message expansion
*/
//static
+BOTAN_FUNC_ISA("sse2")
void SHA_160::sse2_compress_n(secure_vector<uint32_t>& digest, const byte input[], size_t blocks)
{
using namespace SHA1_SSE2_F;
diff --git a/src/lib/modes/aead/gcm/clmul/clmul.cpp b/src/lib/modes/aead/gcm/clmul/clmul.cpp
index 6e1db7012..725ef3da3 100644
--- a/src/lib/modes/aead/gcm/clmul/clmul.cpp
+++ b/src/lib/modes/aead/gcm/clmul/clmul.cpp
@@ -11,6 +11,7 @@
namespace Botan {
+BOTAN_FUNC_ISA("pclmul,ssse3")
void gcm_multiply_clmul(byte x[16], const byte H[16])
{
/*
diff --git a/src/lib/rng/rdrand_rng/rdrand_rng.cpp b/src/lib/rng/rdrand_rng/rdrand_rng.cpp
index 4d2e51cf8..1ee857c6c 100644
--- a/src/lib/rng/rdrand_rng/rdrand_rng.cpp
+++ b/src/lib/rng/rdrand_rng/rdrand_rng.cpp
@@ -36,6 +36,7 @@ uint32_t RDRAND_RNG::rdrand()
}
//static
+BOTAN_FUNC_ISA("rdrnd")
uint32_t RDRAND_RNG::rdrand_status(bool& ok)
{
ok = false;
diff --git a/src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp b/src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp
index e39b285b3..a7261a522 100644
--- a/src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp
+++ b/src/lib/stream/chacha/chacha_sse2/chacha_sse2.cpp
@@ -11,6 +11,7 @@
namespace Botan {
//static
+BOTAN_FUNC_ISA("sse2")
void ChaCha::chacha_sse2_x4(byte output[64], u32bit input[16], size_t rounds)
{
BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");