aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlloyd <[email protected]>2010-07-27 13:35:33 +0000
committerlloyd <[email protected]>2010-07-27 13:35:33 +0000
commite6331ea61e1ccb3eb92ea8707a49be7cebe9ea98 (patch)
treebf421c73a5e3ce23f749a1c95cd6586d58f628b5
parentb46aab7e0eeb16f116fdea19727a22bf60a45748 (diff)
parentf197c68667c721605f5e3942a09e93bb140cac47 (diff)
merge of '068683e77f11701262d6ff5f4004629734c28cd9'
and 'ada4c9893d70affd8934ab9664e390087feab3c9'
-rw-r--r--doc/examples/cpuid.cpp11
-rw-r--r--doc/log.txt3
-rw-r--r--src/engine/aes_isa_eng/aes_isa_engine.cpp2
-rw-r--r--src/libstate/libstate.cpp3
-rw-r--r--src/utils/bswap.h33
-rw-r--r--src/utils/cpuid.cpp47
-rw-r--r--src/utils/cpuid.h47
7 files changed, 84 insertions, 62 deletions
diff --git a/doc/examples/cpuid.cpp b/doc/examples/cpuid.cpp
index f4d441ba2..3693257b8 100644
--- a/doc/examples/cpuid.cpp
+++ b/doc/examples/cpuid.cpp
@@ -13,16 +13,15 @@ namespace {
void print_if_feature(const std::string& feature_name, bool exists)
{
- if(exists)
- std::cout << feature_name << '\n';
- else
- std::cout << '[' << feature_name << ']' << '\n';
+ std::cout << (exists ? '+' : '-') << " " << feature_name << "\n";
}
}
int main()
{
+ CPUID::initialize();
+
std::cout << "Cache line size = " << CPUID::cache_line_size() << "\n";
print_if_feature("RDTSC", CPUID::has_rdtsc());
@@ -32,7 +31,9 @@ int main()
print_if_feature("SSE4.2", CPUID::has_sse42());
print_if_feature("AVX", CPUID::has_avx());
- print_if_feature("AES-NI", CPUID::has_aes_intel());
+ print_if_feature("MOVBE", CPUID::has_movbe());
+ print_if_feature("PCMUL", CPUID::has_pcmuludq());
+ print_if_feature("AES-NI", CPUID::has_aes_ni());
print_if_feature("AltiVec", CPUID::has_altivec());
}
diff --git a/doc/log.txt b/doc/log.txt
index 5487bb4fd..0313eb7db 100644
--- a/doc/log.txt
+++ b/doc/log.txt
@@ -1,5 +1,6 @@
* 1.9.10-dev, ????-??-??
+ - Use GCC byteswap intrinsics where possible
- Drop support for building with Python 2.4
- Fix benchmarking of block ciphers in ECB mode
- Consolidate the two x86 assembly engines
@@ -36,7 +37,7 @@
- Skip building shared libraries on MinGW/Cygwin
- Fix creation of 512 and 768 bit DL groups using the DSA kosherizer
- Fix compilation on GCC versions before 4.3 (missing cpuid.h)
- - Fix complilation under the Clang compiler
+ - Fix compilation under the Clang compiler
* 1.9.7, 2010-04-27
- TLS: Support reading SSLv2 client hellos
diff --git a/src/engine/aes_isa_eng/aes_isa_engine.cpp b/src/engine/aes_isa_eng/aes_isa_engine.cpp
index 682dfe6b1..7f541d583 100644
--- a/src/engine/aes_isa_eng/aes_isa_engine.cpp
+++ b/src/engine/aes_isa_eng/aes_isa_engine.cpp
@@ -19,7 +19,7 @@ AES_ISA_Engine::find_block_cipher(const SCAN_Name& request,
Algorithm_Factory&) const
{
#if defined(BOTAN_HAS_AES_INTEL)
- if(CPUID::has_aes_intel())
+ if(CPUID::has_aes_ni())
{
if(request.algo_name() == "AES-128")
return new AES_128_Intel;
diff --git a/src/libstate/libstate.cpp b/src/libstate/libstate.cpp
index 83fb31406..1db9ca44c 100644
--- a/src/libstate/libstate.cpp
+++ b/src/libstate/libstate.cpp
@@ -8,6 +8,7 @@
#include <botan/libstate.h>
#include <botan/charset.h>
#include <botan/engine.h>
+#include <botan/cpuid.h>
#include <botan/internal/defalloc.h>
#include <botan/internal/default_engine.h>
#include <botan/internal/mutex.h>
@@ -241,6 +242,8 @@ RandomNumberGenerator& Library_State::global_rng()
*/
void Library_State::initialize(bool thread_safe)
{
+ CPUID::initialize();
+
if(mutex_factory)
throw Invalid_State("Library_State has already been initialized");
diff --git a/src/utils/bswap.h b/src/utils/bswap.h
index 49962f076..b35dbf123 100644
--- a/src/utils/bswap.h
+++ b/src/utils/bswap.h
@@ -18,37 +18,56 @@
namespace Botan {
-/*
-* Byte Swapping Functions
+/**
+* Swap a 16 bit integer
*/
inline u16bit reverse_bytes(u16bit input)
{
return rotate_left(input, 8);
}
+/**
+* Swap a 32 bit integer
+*/
inline u32bit reverse_bytes(u32bit input)
{
-#if BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+#if BOTAN_GCC_VERSION >= 430
+
+ // GCC intrinsic added in 4.3, works for a number of CPUs
+ return __builtin_bswap32(input);
+
+#elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
// GCC-style inline assembly for x86 or x86-64
asm("bswapl %0" : "=r" (input) : "0" (input));
return input;
#elif defined(_MSC_VER) && defined(BOTAN_TARGET_ARCH_IS_IA32)
+
// Visual C++ inline asm for 32-bit x86, by Yves Jerschow
__asm mov eax, input;
__asm bswap eax;
#else
+
// Generic implementation
return (rotate_right(input, 8) & 0xFF00FF00) |
(rotate_left (input, 8) & 0x00FF00FF);
+
#endif
}
+/**
+* Swap a 64 bit integer
+*/
inline u64bit reverse_bytes(u64bit input)
{
-#if BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_ARCH_IS_AMD64)
+#if BOTAN_GCC_VERSION >= 430
+
+ // GCC intrinsic added in 4.3, works for a number of CPUs
+ return __builtin_bswap64(input);
+
+#elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_ARCH_IS_AMD64)
// GCC-style inline assembly for x86-64
asm("bswapq %0" : "=r" (input) : "0" (input));
return input;
@@ -69,6 +88,9 @@ inline u64bit reverse_bytes(u64bit input)
#endif
}
+/**
+* Swap 4 Ts in an array
+*/
template<typename T>
inline void bswap_4(T x[4])
{
@@ -80,6 +102,9 @@ inline void bswap_4(T x[4])
#if defined(BOTAN_TARGET_CPU_HAS_SSE2)
+/**
+* Swap 4 u32bits in an array using SSE2 shuffle instructions
+*/
template<>
inline void bswap_4(u32bit x[4])
{
diff --git a/src/utils/cpuid.cpp b/src/utils/cpuid.cpp
index 4837e7ac4..b76210865 100644
--- a/src/utils/cpuid.cpp
+++ b/src/utils/cpuid.cpp
@@ -1,6 +1,6 @@
/*
* Runtime CPU detection
-* (C) 2009 Jack Lloyd
+* (C) 2009-2010 Jack Lloyd
*
* Distributed under the terms of the Botan license
*/
@@ -47,6 +47,10 @@
namespace Botan {
+u64bit CPUID::x86_processor_flags = 0;
+u32bit CPUID::cache_line = 32;
+bool CPUID::altivec_capable = false;
+
namespace {
u32bit get_x86_cache_line_size()
@@ -140,54 +144,21 @@ bool altivec_check_pvr_emul()
}
-/*
-* Call the x86 CPUID instruction and return the contents of ecx and
-* edx, which contain the feature masks.
-*/
-u64bit CPUID::x86_processor_flags()
+void CPUID::initialize()
{
- static u64bit proc_flags = 0;
-
- if(proc_flags)
- return proc_flags;
-
u32bit cpuid[4] = { 0 };
CALL_CPUID(1, cpuid);
- // Set the FPU bit on to force caching in proc_flags
- proc_flags = ((u64bit)cpuid[2] << 32) | cpuid[3] | 1;
-
- return proc_flags;
- }
-
-u32bit CPUID::cache_line_size()
- {
- static u32bit cl_size = 0;
-
- if(cl_size)
- return cl_size;
+ x86_processor_flags = ((u64bit)cpuid[2] << 32) | cpuid[3];
- cl_size = get_x86_cache_line_size();
+ cache_line = get_x86_cache_line_size();
- return cl_size;
- }
+ altivec_capable = false;
-bool CPUID::has_altivec()
- {
- static bool first_time = true;
- static bool altivec_capable = false;
-
- if(first_time)
- {
#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
if(altivec_check_sysctl() || altivec_check_pvr_emul())
altivec_capable = true;
#endif
-
- first_time = false;
- }
-
- return altivec_capable;
}
}
diff --git a/src/utils/cpuid.h b/src/utils/cpuid.h
index a6a278a6e..6cb4092bb 100644
--- a/src/utils/cpuid.h
+++ b/src/utils/cpuid.h
@@ -1,6 +1,6 @@
/*
* Runtime CPU detection
-* (C) 2009 Jack Lloyd
+* (C) 2009-2010 Jack Lloyd
*
* Distributed under the terms of the Botan license
*/
@@ -19,9 +19,14 @@ class BOTAN_DLL CPUID
{
public:
/**
+ * Probe the CPU and see what extensions are supported
+ */
+ static void initialize();
+
+ /**
* Return a best guess of the cache line size
*/
- static u32bit cache_line_size();
+ static u32bit cache_line_size() { return cache_line; }
/**
* Check if the processor supports RDTSC
@@ -60,32 +65,48 @@ class BOTAN_DLL CPUID
{ return x86_processor_flags_has(CPUID_AVX_BIT); }
/**
- * Check if the processor supports Intel's AES instructions
+ * Check if the processor supports AES-NI
+ */
+ static bool has_aes_ni()
+ { return x86_processor_flags_has(CPUID_AESNI_BIT); }
+
+ /**
+ * Check if the processor supports PCMULUDQ
+ */
+ static bool has_pcmuludq()
+ { return x86_processor_flags_has(CPUID_PCMUL_BIT); }
+
+ /**
+ * Check if the processor supports MOVBE
*/
- static bool has_aes_intel()
- { return x86_processor_flags_has(CPUID_INTEL_AES_BIT); }
+ static bool has_movbe()
+ { return x86_processor_flags_has(CPUID_MOVBE_BIT); }
/**
* Check if the processor supports AltiVec/VMX
*/
- static bool has_altivec();
+ static bool has_altivec() { return altivec_capable; }
private:
- static bool x86_processor_flags_has(u64bit bit)
- {
- return ((x86_processor_flags() >> bit) & 1);
- }
-
enum CPUID_bits {
CPUID_RDTSC_BIT = 4,
CPUID_SSE2_BIT = 26,
+ CPUID_PCMUL_BIT = 33,
CPUID_SSSE3_BIT = 41,
CPUID_SSE41_BIT = 51,
CPUID_SSE42_BIT = 52,
- CPUID_INTEL_AES_BIT = 57,
+ CPUID_MOVBE_BIT = 54,
+ CPUID_AESNI_BIT = 57,
CPUID_AVX_BIT = 60
};
- static u64bit x86_processor_flags();
+ static bool x86_processor_flags_has(u64bit bit)
+ {
+ return ((x86_processor_flags >> bit) & 1);
+ }
+
+ static u64bit x86_processor_flags;
+ static u32bit cache_line;
+ static bool altivec_capable;
};
}