diff options
32 files changed, 482 insertions, 120 deletions
diff --git a/configure.py b/configure.py index 93192abdd..5e2e4004d 100755 --- a/configure.py +++ b/configure.py @@ -37,9 +37,9 @@ class BuildConfigurationInformation(object): """ version_major = 1 version_minor = 9 - version_patch = 2 - version_so_patch = 2 - version_suffix = '' + version_patch = 3 + version_so_patch = 3 + version_suffix = '-dev' version_string = '%d.%d.%d%s' % ( version_major, version_minor, version_patch, version_suffix) @@ -78,7 +78,8 @@ class BuildConfigurationInformation(object): if file.endswith('.cpp')]) self.python_sources = sorted( - [os.path.join(self.python_dir, file) for file in os.listdir(self.python_dir) + [os.path.join(self.python_dir, file) + for file in os.listdir(self.python_dir) if file.endswith('.cpp')]) def doc_files(self): @@ -142,6 +143,11 @@ def process_command_line(args): dest='unaligned_mem', action='store_false', help=SUPPRESS_HELP) + target_group.add_option('--with-isa-extension', metavar='ISALIST', + dest='with_isa_extns', + action='append', default=[], + help='enable ISA extensions (sse2, altivec, ...)') + build_group = OptionGroup(parser, 'Build options') build_group.add_option('--enable-shared', dest='build_shared_lib', @@ -270,11 +276,13 @@ def process_command_line(args): raise Exception('Bad value to --with-endian "%s"' % ( options.with_endian)) - def parse_module_opts(modules): + def parse_multiple_enable(modules): return sorted(set(sum([s.split(',') for s in modules], []))) - options.enabled_modules = parse_module_opts(options.enabled_modules) - options.disabled_modules = parse_module_opts(options.disabled_modules) + options.enabled_modules = parse_multiple_enable(options.enabled_modules) + options.disabled_modules = parse_multiple_enable(options.disabled_modules) + + options.with_isa_extns = parse_multiple_enable(options.with_isa_extns) return options @@ -361,6 +369,7 @@ class ModuleInfo(object): 'define': None, 'modset': None, 'uses_tr1': 'false', + 'need_isa': None, 'note': '', 'mp_bits': 0 }) @@ -395,8 +404,21 @@ class ModuleInfo(object): else: self.uses_tr1 = False - def compatible_cpu(self, arch, cpu): - return self.arch == [] or (arch in self.arch or cpu in self.arch) + def compatible_cpu(self, archinfo, options): + + arch_name = archinfo.basename + cpu_name = options.cpu + + if self.arch != []: + if arch_name not in self.arch and cpu_name not in self.arch: + return False + + if self.need_isa != None: + cpu_isa = archinfo.isa_extensions_in(cpu_name) + if self.need_isa not in cpu_isa: + return self.need_isa in options.with_isa_extns + + return True def compatible_os(self, os): return self.os == [] or os in self.os @@ -423,12 +445,21 @@ class ModuleInfo(object): class ArchInfo(object): def __init__(self, infofile): lex_me_harder(infofile, self, - ['aliases', 'submodels', 'submodel_aliases'], - { 'default_submodel': None, - 'endian': None, + ['aliases', 'submodels', 'submodel_aliases', 'isa_extn'], + { 'endian': None, 'unaligned': 'no' }) + def convert_isa_list(input): + isa_info = {} + for line in self.isa_extn: + (isa,cpus) = line.split(':') + for cpu in cpus.split(','): + isa_info.setdefault(cpu, []).append(isa) + return isa_info + + self.isa_extn = convert_isa_list(self.isa_extn) + self.submodel_aliases = force_to_dict(self.submodel_aliases) if self.unaligned == 'ok': @@ -436,35 +467,53 @@ class ArchInfo(object): else: self.unaligned_ok = 0 + """ + Return ISA extensions specific to this CPU + """ + def isa_extensions_in(self, cpu_type): + return sorted(self.isa_extn.get(cpu_type, []) + + self.isa_extn.get('all', [])) + + """ + Return a list of all submodels for this arch + """ def all_submodels(self): return sorted(zip(self.submodels, self.submodels) + self.submodel_aliases.items(), key = lambda k: len(k[0]), reverse = True) - def defines(self, target_submodel, with_endian, unaligned_ok): + """ + Return CPU-specific defines for build.h + """ + def defines(self, options): macros = ['TARGET_ARCH_IS_%s' % (self.basename.upper())] def form_cpu_macro(cpu_name): return cpu_name.upper().replace('.', '').replace('-', '_') - if self.basename != target_submodel: - macros.append('TARGET_CPU_IS_%s' % ( - form_cpu_macro(target_submodel))) + if self.basename != options.cpu: + macros.append('TARGET_CPU_IS_%s' % (form_cpu_macro(options.cpu))) + + isa_extensions = sorted(set( + sum([self.isa_extensions_in(options.cpu), + options.with_isa_extns], + []))) - if with_endian: - macros.append('TARGET_CPU_IS_%s_ENDIAN' % (with_endian.upper())) - elif self.endian != None: - macros.append('TARGET_CPU_IS_%s_ENDIAN' % (self.endian.upper())) + for simd in isa_extensions: + macros.append('TARGET_CPU_HAS_%s' % (simd.upper())) + endian = options.with_endian or self.endian + + if endian != None: + macros.append('TARGET_CPU_IS_%s_ENDIAN' % (endian.upper())) + + unaligned_ok = options.unaligned_mem if unaligned_ok is None: unaligned_ok = self.unaligned_ok + if unaligned_ok: + logging.info('Assuming unaligned memory access works') - if unaligned_ok: - logging.info('Assuming unaligned memory access works on this CPU') - macros.append('TARGET_UNALIGNED_LOADSTOR_OK %d' % (unaligned_ok)) - - if self.basename == 'amd64': - macros.append('TARGET_CPU_HAS_SSE2') + macros.append('TARGET_UNALIGNED_MEMORY_ACCESS_OK %d' % (unaligned_ok)) return macros @@ -514,7 +563,9 @@ class CompilerInfo(object): del self.mach_opt - + """ + Return the machine specific ABI flags + """ def mach_abi_link_flags(self, osname, arch, submodel): abi_link = set() @@ -526,6 +577,9 @@ class CompilerInfo(object): return '' return ' ' + ' '.join(abi_link) + """ + Return the flags for MACH_OPT + """ def mach_opts(self, arch, submodel): def submodel_fixup(tup): @@ -541,6 +595,9 @@ class CompilerInfo(object): return '' + """ + Return the flags for LIB_OPT + """ def library_opt_flags(self, debug_build): flags = self.lib_opt_flags if debug_build and self.debug_flags != '': @@ -549,11 +606,17 @@ class CompilerInfo(object): flags += ' ' + self.no_debug_flags return flags + """ + Return the command needed to link a shared object + """ def so_link_command_for(self, osname): if osname in self.so_link_flags: return self.so_link_flags[osname] return self.so_link_flags['default'] + """ + Return defines for build.h + """ def defines(self, with_tr1): def tr1_macro(): @@ -783,9 +846,7 @@ def create_template_vars(build_config, options, modules, cc, arch, osinfo): 'target_compiler_defines': make_cpp_macros( cc.defines(options.with_tr1)), - 'target_cpu_defines': make_cpp_macros( - arch.defines(options.cpu, options.with_endian, - options.unaligned_mem)), + 'target_cpu_defines': make_cpp_macros(arch.defines(options)), 'include_files': makefile_list(build_config.headers), @@ -840,7 +901,7 @@ def create_template_vars(build_config, options, modules, cc, arch, osinfo): """ Determine which modules to load based on options, target, etc """ -def choose_modules_to_use(options, modules): +def choose_modules_to_use(modules, archinfo, options): to_load = [] maybe_dep = [] @@ -855,7 +916,7 @@ def choose_modules_to_use(options, modules): elif modname in options.enabled_modules: to_load.append(modname) # trust the user - elif not module.compatible_cpu(options.arch, options.cpu): + elif not module.compatible_cpu(archinfo, options): cannot_use_because(modname, 'CPU incompatible') elif not module.compatible_os(options.os): cannot_use_because(modname, 'OS incompatible') @@ -1047,8 +1108,8 @@ def setup_build(build_config, options, template_vars): os.path.join(build_config.build_dir, sink) if options.boost_python: - templates_to_proc[ - os.path.join(options.makefile_dir, 'python.in')] = 'Makefile.python' + template = os.path.join(options.makefile_dir, 'python.in') + templates_to_proc[template] = 'Makefile.python' for (template, sink) in templates_to_proc.items(): try: @@ -1123,9 +1184,10 @@ def main(argv = None): logging.info('Guessing target processor is a %s/%s' % ( options.arch, options.cpu)) else: + cpu_from_user = options.cpu (options.arch, options.cpu) = canon_processor(archinfo, options.cpu) - logging.debug('Canonicalizized --cpu to %s/%s' % ( - options.arch, options.cpu)) + logging.info('Canonicalizized --cpu=%s to %s/%s' % ( + cpu_from_user, options.arch, options.cpu)) logging.info('Target is %s-%s-%s-%s' % ( options.compiler, options.os, options.arch, options.cpu)) @@ -1164,7 +1226,9 @@ def main(argv = None): else: options.with_tr1 = 'none' - modules_to_use = choose_modules_to_use(options, modules) + modules_to_use = choose_modules_to_use(modules, + archinfo[options.arch], + options) build_config = BuildConfigurationInformation(options, modules_to_use) build_config.headers.append( diff --git a/doc/log.txt b/doc/log.txt index 97e40db5e..75fde9a5f 100644 --- a/doc/log.txt +++ b/doc/log.txt @@ -1,4 +1,7 @@ +* 1.9.3-dev, ????-??-?? + - Set macros for available SIMD instructions in build.h + * 1.9.2, 2009-11-03 - Add SIMD version of XTEA - Support both SSE2 and AltiVec SIMD for Serpent and XTEA diff --git a/readme.txt b/readme.txt index a192a1e1e..44f1f471c 100644 --- a/readme.txt +++ b/readme.txt @@ -1,4 +1,4 @@ -Botan 1.9.2, 2009-11-03 +Botan 1.9.3-dev, ????-??-?? Botan is a C++ class library for performing a wide variety of cryptographic operations. diff --git a/src/block/aes_intel/aes_intel.cpp b/src/block/aes_intel/aes_intel.cpp new file mode 100644 index 000000000..057728e72 --- /dev/null +++ b/src/block/aes_intel/aes_intel.cpp @@ -0,0 +1,177 @@ +/** +* AES +* (C) 1999-2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include <botan/aes_intel.h> +#include <wmmintrin.h> + +namespace Botan { + +namespace { + +__m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon) + { + key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(3,3,3,3)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + return _mm_xor_si128(key, key_with_rcon); + } + +} + +/** +* AES Encryption +*/ +void AES_128_Intel::encrypt_n(const byte in[], byte out[], u32bit blocks) const + { + const __m128i* in_mm = (const __m128i*)in; + __m128i* out_mm = (__m128i*)out; + + const __m128i* key_mm = (const __m128i*)&EK[0]; + + __m128i K0 = _mm_loadu_si128(key_mm); + __m128i K1 = _mm_loadu_si128(key_mm + 1); + __m128i K2 = _mm_loadu_si128(key_mm + 2); + __m128i K3 = _mm_loadu_si128(key_mm + 3); + __m128i K4 = _mm_loadu_si128(key_mm + 4); + __m128i K5 = _mm_loadu_si128(key_mm + 5); + __m128i K6 = _mm_loadu_si128(key_mm + 6); + __m128i K7 = _mm_loadu_si128(key_mm + 7); + __m128i K8 = _mm_loadu_si128(key_mm + 8); + __m128i K9 = _mm_loadu_si128(key_mm + 9); + __m128i K10 = _mm_loadu_si128(key_mm + 10); + + for(u32bit i = 0; i != blocks; ++i) + { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesenc_si128(B, K1); + B = _mm_aesenc_si128(B, K2); + B = _mm_aesenc_si128(B, K3); + B = _mm_aesenc_si128(B, K4); + B = _mm_aesenc_si128(B, K5); + B = _mm_aesenc_si128(B, K6); + B = _mm_aesenc_si128(B, K7); + B = _mm_aesenc_si128(B, K8); + B = _mm_aesenc_si128(B, K9); + B = _mm_aesenclast_si128(B, K10); + + _mm_storeu_si128(out_mm + i, B); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/** +* AES Decryption +*/ +void AES_128_Intel::decrypt_n(const byte in[], byte out[], u32bit blocks) const + { + const __m128i* in_mm = (const __m128i*)in; + __m128i* out_mm = (__m128i*)out; + + const __m128i* key_mm = (const __m128i*)&DK[0]; + + __m128i K0 = _mm_loadu_si128(key_mm); + __m128i K1 = _mm_loadu_si128(key_mm + 1); + __m128i K2 = _mm_loadu_si128(key_mm + 2); + __m128i K3 = _mm_loadu_si128(key_mm + 3); + __m128i K4 = _mm_loadu_si128(key_mm + 4); + __m128i K5 = _mm_loadu_si128(key_mm + 5); + __m128i K6 = _mm_loadu_si128(key_mm + 6); + __m128i K7 = _mm_loadu_si128(key_mm + 7); + __m128i K8 = _mm_loadu_si128(key_mm + 8); + __m128i K9 = _mm_loadu_si128(key_mm + 9); + __m128i K10 = _mm_loadu_si128(key_mm + 10); + + for(u32bit i = 0; i != blocks; ++i) + { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesdec_si128(B, K1); + B = _mm_aesdec_si128(B, K2); + B = _mm_aesdec_si128(B, K3); + B = _mm_aesdec_si128(B, K4); + B = _mm_aesdec_si128(B, K5); + B = _mm_aesdec_si128(B, K6); + B = _mm_aesdec_si128(B, K7); + B = _mm_aesdec_si128(B, K8); + B = _mm_aesdec_si128(B, K9); + B = _mm_aesdeclast_si128(B, K10); + + _mm_storeu_si128(out_mm + i, B); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/** +* AES Key Schedule +*/ +void AES_128_Intel::key_schedule(const byte key[], u32bit length) + { + + #define AES_128_key_exp(K, RCON) \ + aes_128_key_expansion(K, _mm_aeskeygenassist_si128(K, RCON)) + + __m128i K0 = _mm_loadu_si128((const __m128i*)key); + __m128i K1 = AES_128_key_exp(K0, 0x01); + __m128i K2 = AES_128_key_exp(K1, 0x02); + __m128i K3 = AES_128_key_exp(K2, 0x04); + __m128i K4 = AES_128_key_exp(K3, 0x08); + __m128i K5 = AES_128_key_exp(K4, 0x10); + __m128i K6 = AES_128_key_exp(K5, 0x20); + __m128i K7 = AES_128_key_exp(K6, 0x40); + __m128i K8 = AES_128_key_exp(K7, 0x80); + __m128i K9 = AES_128_key_exp(K8, 0x1B); + __m128i K10 = AES_128_key_exp(K9, 0x36); + + __m128i* EK_mm = (__m128i*)&EK[0]; + _mm_storeu_si128(EK_mm , K0); + _mm_storeu_si128(EK_mm + 1, K1); + _mm_storeu_si128(EK_mm + 2, K2); + _mm_storeu_si128(EK_mm + 3, K3); + _mm_storeu_si128(EK_mm + 4, K4); + _mm_storeu_si128(EK_mm + 5, K5); + _mm_storeu_si128(EK_mm + 6, K6); + _mm_storeu_si128(EK_mm + 7, K7); + _mm_storeu_si128(EK_mm + 8, K8); + _mm_storeu_si128(EK_mm + 9, K9); + _mm_storeu_si128(EK_mm + 10, K10); + + // Now generate decryption keys + + __m128i* DK_mm = (__m128i*)&DK[0]; + _mm_storeu_si128(DK_mm , K10); + _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K9)); + _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K8)); + _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K7)); + _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K6)); + _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K5)); + _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K4)); + _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K3)); + _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K2)); + _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K1)); + _mm_storeu_si128(DK_mm + 10, K0); + } + +/** +* Clear memory of sensitive data +*/ +void AES_128_Intel::clear() + { + EK.clear(); + DK.clear(); + } + +} diff --git a/src/block/aes_intel/aes_intel.h b/src/block/aes_intel/aes_intel.h new file mode 100644 index 000000000..90270939c --- /dev/null +++ b/src/block/aes_intel/aes_intel.h @@ -0,0 +1,34 @@ +/** +* AES using Intel's AES instructions +* (C) 1999-2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_AES_INTEL_H__ +#define BOTAN_AES_INTEL_H__ + +#include <botan/block_cipher.h> + +namespace Botan { + +class BOTAN_DLL AES_128_Intel : public BlockCipher + { + public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + + void clear(); + std::string name() const { return "AES-128"; } + BlockCipher* clone() const { return new AES_128_Intel; } + + AES_128_Intel() : BlockCipher(16, 16) { } + private: + void key_schedule(const byte[], u32bit); + + SecureBuffer<u32bit, 44> EK, DK; + }; + +} + +#endif diff --git a/src/block/aes_intel/info.txt b/src/block/aes_intel/info.txt new file mode 100644 index 000000000..6e67a6ed9 --- /dev/null +++ b/src/block/aes_intel/info.txt @@ -0,0 +1,9 @@ +define AES_INTEL + +load_on auto + +need_isa aes_ni + +<requires> +aes_isa_eng +</requires> diff --git a/src/build-data/arch/alpha.txt b/src/build-data/arch/alpha.txt index b1d939ed1..2bf72edef 100644 --- a/src/build-data/arch/alpha.txt +++ b/src/build-data/arch/alpha.txt @@ -1,4 +1,3 @@ -default_submodel alpha-ev4 endian little <aliases> diff --git a/src/build-data/arch/amd64.txt b/src/build-data/arch/amd64.txt index 96da0e3a9..6c1c2a7e4 100644 --- a/src/build-data/arch/amd64.txt +++ b/src/build-data/arch/amd64.txt @@ -1,5 +1,3 @@ -default_submodel opteron - endian little unaligned ok @@ -9,9 +7,12 @@ x86_64 # for RPM </aliases> <submodels> -opteron +k8 +k10 em64t core2 +nehalem +westmere </submodels> <submodel_aliases> @@ -19,7 +20,15 @@ core2duo -> core2 intelcore2 -> core2 intelcore2duo -> core2 -amdopteron -> opteron -athlon64 -> opteron -k8 -> opteron +sledgehammer -> k8 +opteron -> k8 +amdopteron -> k8 +athlon64 -> k8 +barcelona -> k10 </submodel_aliases> + +<isa_extn> +sse2:all +ssse3:core2,nehalem,westmere +aes_ni:westmere +</isa_extn> diff --git a/src/build-data/arch/arm.txt b/src/build-data/arch/arm.txt index 5f05d4cad..77f15b1d9 100644 --- a/src/build-data/arch/arm.txt +++ b/src/build-data/arch/arm.txt @@ -1,5 +1,3 @@ -default_submodel arm2 - <submodels> arm2 arm3 @@ -11,6 +9,8 @@ strongarm strongarm110 strongarm1100 xscale +cortex-a8 +cortex-a9 </submodels> <submodel_aliases> @@ -19,3 +19,7 @@ sa1100 -> strongarm1100 strongarm1110 -> strongarm1100 armv5tel -> xscale </submodel_aliases> + +<isa_extn> +neon:cortex-a8,cortex-a9 +</isa_extn> diff --git a/src/build-data/arch/hitachi-sh.txt b/src/build-data/arch/hitachi-sh.txt index 85a741f59..bab84b48f 100644 --- a/src/build-data/arch/hitachi-sh.txt +++ b/src/build-data/arch/hitachi-sh.txt @@ -1,5 +1,3 @@ -default_submodel hitachi-sh1 - <submodels> hitachi-sh1 hitachi-sh2 diff --git a/src/build-data/arch/hppa.txt b/src/build-data/arch/hppa.txt index 67bca263d..0f7d90c68 100644 --- a/src/build-data/arch/hppa.txt +++ b/src/build-data/arch/hppa.txt @@ -1,5 +1,3 @@ -default_submodel hppa1.0 - <aliases> hp-pa parisc diff --git a/src/build-data/arch/ia32.txt b/src/build-data/arch/ia32.txt index 0fe665e68..40066851d 100644 --- a/src/build-data/arch/ia32.txt +++ b/src/build-data/arch/ia32.txt @@ -1,5 +1,3 @@ -default_submodel i386 - endian little unaligned ok @@ -22,6 +20,7 @@ pentium-m prescott k6 athlon +atom </submodels> <submodel_aliases> @@ -56,3 +55,8 @@ intelcput2500 -> prescott intelcput2600 -> prescott intelcput2700 -> prescott </submodel_aliases> + +<isa_extn> +sse2:pentium4,prescott,pentium-m,atom +ssse3:atom +</isa_extn> diff --git a/src/build-data/arch/ia64.txt b/src/build-data/arch/ia64.txt index 65309f0ff..55967d5ab 100644 --- a/src/build-data/arch/ia64.txt +++ b/src/build-data/arch/ia64.txt @@ -1,13 +1,14 @@ -# This is safe: only affects tuning, not ISA -default_submodel itanium2 - <aliases> -merced itanium +itanic </aliases> <submodels> -itanium1 -itanium2 +merced mckinley +montvale </submodels> + +<submodel_aliases> +itanium2 -> mckinley +</submodel_aliases> diff --git a/src/build-data/arch/m68k.txt b/src/build-data/arch/m68k.txt index 3a8b5e8b3..691c45b92 100644 --- a/src/build-data/arch/m68k.txt +++ b/src/build-data/arch/m68k.txt @@ -1,5 +1,3 @@ -default_submodel 68020 - endian big # Except for Coldfire diff --git a/src/build-data/arch/mips32.txt b/src/build-data/arch/mips32.txt index ec9d4b5bf..b69abdacb 100644 --- a/src/build-data/arch/mips32.txt +++ b/src/build-data/arch/mips32.txt @@ -1,5 +1,3 @@ -default_submodel r3000 - <aliases> mips mipsel # For Debian diff --git a/src/build-data/arch/mips64.txt b/src/build-data/arch/mips64.txt index 666ba7e18..a2fd5849b 100644 --- a/src/build-data/arch/mips64.txt +++ b/src/build-data/arch/mips64.txt @@ -1,5 +1,3 @@ -default_submodel r4400 - <submodels> r4000 r4100 diff --git a/src/build-data/arch/ppc.txt b/src/build-data/arch/ppc.txt index 254643fdd..811eb53e7 100644 --- a/src/build-data/arch/ppc.txt +++ b/src/build-data/arch/ppc.txt @@ -1,8 +1,6 @@ endian big unaligned ok -default_submodel ppc604 - <aliases> powerpc </aliases> @@ -21,3 +19,7 @@ ppc750 ppc7400 ppc7450 </submodels> + +<isa_extn> +altivec:ppc7400,ppc7450 +</isa_extn> diff --git a/src/build-data/arch/ppc64.txt b/src/build-data/arch/ppc64.txt index f044ba98d..b99aa9b8b 100644 --- a/src/build-data/arch/ppc64.txt +++ b/src/build-data/arch/ppc64.txt @@ -1,7 +1,5 @@ endian big -default_submodel power4 - <aliases> powerpc64 </aliases> @@ -16,9 +14,15 @@ ppc970 power3 power4 power5 +power6 +power7 cellppu </submodels> <submodel_aliases> cellbroadbandengine -> cellppu </submodel_aliases> + +<isa_extn> +altivec:cellppu,ppc970,power6,power7 +</isa_extn> diff --git a/src/build-data/arch/s390.txt b/src/build-data/arch/s390.txt index 8024a4315..6e9f99f12 100644 --- a/src/build-data/arch/s390.txt +++ b/src/build-data/arch/s390.txt @@ -1,5 +1,3 @@ -default_submodel s390 - endian big unaligned ok diff --git a/src/build-data/arch/s390x.txt b/src/build-data/arch/s390x.txt index 00daab8b4..6f4271607 100644 --- a/src/build-data/arch/s390x.txt +++ b/src/build-data/arch/s390x.txt @@ -1,5 +1,3 @@ -default_submodel s390x - endian big unaligned ok diff --git a/src/build-data/arch/sparc32.txt b/src/build-data/arch/sparc32.txt index 57b19c519..ae75a2a4b 100644 --- a/src/build-data/arch/sparc32.txt +++ b/src/build-data/arch/sparc32.txt @@ -1,7 +1,3 @@ -# V7 doesn't have integer multiply, so it will be bitterly slow for some things -# (especially BigInt). Also, it's fairly rare nowadays, so we default to V8. -default_submodel sparc32-v8 - endian big <aliases> diff --git a/src/build-data/arch/sparc64.txt b/src/build-data/arch/sparc64.txt index e308055fa..8f62e66f6 100644 --- a/src/build-data/arch/sparc64.txt +++ b/src/build-data/arch/sparc64.txt @@ -1,5 +1,3 @@ -default_submodel sparc64-ultra - <submodels> sparc64-ultra sparc64-ultra2 diff --git a/src/build-data/cc/gcc.txt b/src/build-data/cc/gcc.txt index 370bb84d7..f25f81048 100644 --- a/src/build-data/cc/gcc.txt +++ b/src/build-data/cc/gcc.txt @@ -52,6 +52,9 @@ sparc64-ultra3 -> "-mcpu=v9 -mtune=ultrasparc3" em64t -> "-march=nocona -momit-leaf-frame-pointer" cellppu -> "" +nehalem -> "-march=core2 -msse4.1 -msse4.2" +westmere -> "-march=core2 -maes -msse4.1 -msse4.2" + # Default family options (SUBMODEL is substitued with the real submodel) # Anything after the quotes is what should be *removed* from the submodel name # before it's put into SUBMODEL. diff --git a/src/engine/aes_isa_eng/aes_isa_engine.cpp b/src/engine/aes_isa_eng/aes_isa_engine.cpp new file mode 100644 index 000000000..fa3b4ceab --- /dev/null +++ b/src/engine/aes_isa_eng/aes_isa_engine.cpp @@ -0,0 +1,54 @@ +/* +* Engine for AES instructions +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include <botan/aes_isa_engine.h> +#include <botan/cpuid.h> + +#if defined(BOTAN_HAS_AES_INTEL) + #include <botan/aes_intel.h> +#endif + +#if defined(BOTAN_HAS_AES_VIA) + #include <botan/aes_via.h> +#endif + +namespace Botan { + +BlockCipher* +AES_ISA_Engine::find_block_cipher(const SCAN_Name& request, + Algorithm_Factory&) const + { +#if defined(BOTAN_HAS_AES_INTEL) + if(CPUID::has_intel_aes()) + { + if(request.algo_name() == "AES-128") + return new AES_128_Intel; + /* + if(request.algo_name() == "AES-192") + return new AES_192_Intel; + if(request.algo_name() == "AES-256") + return new AES_256_Intel; + */ + } +#endif + +#if defined(BOTAN_HAS_AES_VIA) + if(CPUID::has_via_aes()) + { + if(request.algo_name() == "AES-128") + return new AES_128_VIA; + if(request.algo_name() == "AES-192") + return new AES_192_VIA; + if(request.algo_name() == "AES-256") + return new AES_256_VIA; + } +#endif + + return 0; + } + +} diff --git a/src/engine/aes_isa_eng/aes_isa_engine.h b/src/engine/aes_isa_eng/aes_isa_engine.h new file mode 100644 index 000000000..602a114a9 --- /dev/null +++ b/src/engine/aes_isa_eng/aes_isa_engine.h @@ -0,0 +1,26 @@ +/** +* Engine for AES instructions +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_AES_ISA_ENGINE_H__ +#define BOTAN_AES_ISA_ENGINE_H__ + +#include <botan/engine.h> + +namespace Botan { + +class BOTAN_DLL AES_ISA_Engine : public Engine + { + public: + std::string provider_name() const { return "aes_isa"; } + private: + BlockCipher* find_block_cipher(const SCAN_Name&, + Algorithm_Factory&) const; + }; + +} + +#endif diff --git a/src/engine/aes_isa_eng/info.txt b/src/engine/aes_isa_eng/info.txt new file mode 100644 index 000000000..c0695aaf3 --- /dev/null +++ b/src/engine/aes_isa_eng/info.txt @@ -0,0 +1,3 @@ +define ENGINE_AES_ISA + +load_on dep diff --git a/src/hash/sha1_sse2/info.txt b/src/hash/sha1_sse2/info.txt index ee61076b4..7a380753d 100644 --- a/src/hash/sha1_sse2/info.txt +++ b/src/hash/sha1_sse2/info.txt @@ -1,19 +1,8 @@ define SHA1_SSE2 +need_isa sse2 + <requires> sha1 simd_engine </requires> - -<arch> -pentium-m -pentium4 -prescott -amd64 -</arch> - -<cc> -gcc -icc -msvc -</cc> diff --git a/src/libstate/libstate.cpp b/src/libstate/libstate.cpp index c78bce62d..8b039a97a 100644 --- a/src/libstate/libstate.cpp +++ b/src/libstate/libstate.cpp @@ -37,6 +37,10 @@ #include <botan/eng_amd64.h> #endif +#if defined(BOTAN_HAS_ENGINE_AES_ISA) + #include <botan/aes_isa_engine.h> +#endif + #if defined(BOTAN_HAS_ENGINE_SIMD) #include <botan/simd_engine.h> #endif @@ -288,6 +292,10 @@ void Library_State::initialize(bool thread_safe) engines.push_back(new OpenSSL_Engine); #endif +#if defined(BOTAN_HAS_ENGINE_AES_ISA) + engines.push_back(new AES_ISA_Engine); +#endif + #if defined(BOTAN_HAS_ENGINE_SIMD) engines.push_back(new SIMD_Engine); #endif diff --git a/src/timer/cpu_counter/info.txt b/src/timer/cpu_counter/info.txt index d95e0fec5..2ab1343bc 100644 --- a/src/timer/cpu_counter/info.txt +++ b/src/timer/cpu_counter/info.txt @@ -12,10 +12,12 @@ gcc </cc> <arch> + # RDTSC: Pentium and up i586 i686 athlon +pentium3 pentium4 pentium-m amd64 diff --git a/src/utils/loadstor.h b/src/utils/loadstor.h index 8f430f36c..489a789f4 100644 --- a/src/utils/loadstor.h +++ b/src/utils/loadstor.h @@ -14,7 +14,7 @@ #include <botan/rotate.h> #include <botan/prefetch.h> -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK #if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) @@ -101,7 +101,7 @@ inline T load_le(const byte in[], u32bit off) template<> inline u16bit load_be<u16bit>(const byte in[], u32bit off) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u16bit*>(in) + off)); #else in += off * sizeof(u16bit); @@ -112,7 +112,7 @@ inline u16bit load_be<u16bit>(const byte in[], u32bit off) template<> inline u16bit load_le<u16bit>(const byte in[], u32bit off) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u16bit*>(in) + off)); #else in += off * sizeof(u16bit); @@ -123,7 +123,7 @@ inline u16bit load_le<u16bit>(const byte in[], u32bit off) template<> inline u32bit load_be<u32bit>(const byte in[], u32bit off) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u32bit*>(in) + off)); #else in += off * sizeof(u32bit); @@ -134,7 +134,7 @@ inline u32bit load_be<u32bit>(const byte in[], u32bit off) template<> inline u32bit load_le<u32bit>(const byte in[], u32bit off) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u32bit*>(in) + off)); #else in += off * sizeof(u32bit); @@ -145,7 +145,7 @@ inline u32bit load_le<u32bit>(const byte in[], u32bit off) template<> inline u64bit load_be<u64bit>(const byte in[], u32bit off) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u64bit*>(in) + off)); #else in += off * sizeof(u64bit); @@ -157,7 +157,7 @@ inline u64bit load_be<u64bit>(const byte in[], u32bit off) template<> inline u64bit load_le<u64bit>(const byte in[], u32bit off) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u64bit*>(in) + off)); #else in += off * sizeof(u64bit); @@ -281,7 +281,7 @@ inline void load_be(T out[], */ inline void store_be(u16bit in, byte out[2]) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK *reinterpret_cast<u16bit*>(out) = BOTAN_ENDIAN_B2N(in); #else out[0] = get_byte(0, in); @@ -291,7 +291,7 @@ inline void store_be(u16bit in, byte out[2]) inline void store_le(u16bit in, byte out[2]) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK *reinterpret_cast<u16bit*>(out) = BOTAN_ENDIAN_L2N(in); #else out[0] = get_byte(1, in); @@ -301,7 +301,7 @@ inline void store_le(u16bit in, byte out[2]) inline void store_be(u32bit in, byte out[4]) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK *reinterpret_cast<u32bit*>(out) = BOTAN_ENDIAN_B2N(in); #else out[0] = get_byte(0, in); @@ -313,7 +313,7 @@ inline void store_be(u32bit in, byte out[4]) inline void store_le(u32bit in, byte out[4]) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK *reinterpret_cast<u32bit*>(out) = BOTAN_ENDIAN_L2N(in); #else out[0] = get_byte(3, in); @@ -325,7 +325,7 @@ inline void store_le(u32bit in, byte out[4]) inline void store_be(u64bit in, byte out[8]) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK *reinterpret_cast<u64bit*>(out) = BOTAN_ENDIAN_B2N(in); #else out[0] = get_byte(0, in); @@ -341,7 +341,7 @@ inline void store_be(u64bit in, byte out[8]) inline void store_le(u64bit in, byte out[8]) { -#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK +#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK *reinterpret_cast<u64bit*>(out) = BOTAN_ENDIAN_L2N(in); #else out[0] = get_byte(7, in); diff --git a/src/utils/simd_32/info.txt b/src/utils/simd_32/info.txt index 64707c1e4..883096a5d 100644 --- a/src/utils/simd_32/info.txt +++ b/src/utils/simd_32/info.txt @@ -1,16 +1,3 @@ define SIMD_32 load_on always - -<arch> -pentium-m -pentium4 -prescott -amd64 -</arch> - -<cc> -gcc -icc -msvc -</cc> diff --git a/src/utils/xor_buf.h b/src/utils/xor_buf.h index 39781f017..39c4a493d 100644 --- a/src/utils/xor_buf.h +++ b/src/utils/xor_buf.h @@ -22,7 +22,7 @@ inline void xor_buf(byte out[], const byte in[], u32bit length) { while(length >= 8) { -#if BOTAN_UNALIGNED_LOADSTOR_OK +#if BOTAN_UNALIGNED_MEMORY_ACCESS_OK *reinterpret_cast<u64bit*>(out) ^= *reinterpret_cast<const u64bit*>(in); #else out[0] ^= in[0]; out[1] ^= in[1]; @@ -51,7 +51,7 @@ inline void xor_buf(byte out[], { while(length >= 8) { -#if BOTAN_UNALIGNED_LOADSTOR_OK +#if BOTAN_UNALIGNED_MEMORY_ACCESS_OK *reinterpret_cast<u64bit*>(out) = *reinterpret_cast<const u64bit*>(in) ^ *reinterpret_cast<const u64bit*>(in2); |