aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xconfigure.py138
-rw-r--r--doc/log.txt3
-rw-r--r--readme.txt2
-rw-r--r--src/block/aes_intel/aes_intel.cpp177
-rw-r--r--src/block/aes_intel/aes_intel.h34
-rw-r--r--src/block/aes_intel/info.txt9
-rw-r--r--src/build-data/arch/alpha.txt1
-rw-r--r--src/build-data/arch/amd64.txt21
-rw-r--r--src/build-data/arch/arm.txt8
-rw-r--r--src/build-data/arch/hitachi-sh.txt2
-rw-r--r--src/build-data/arch/hppa.txt2
-rw-r--r--src/build-data/arch/ia32.txt8
-rw-r--r--src/build-data/arch/ia64.txt13
-rw-r--r--src/build-data/arch/m68k.txt2
-rw-r--r--src/build-data/arch/mips32.txt2
-rw-r--r--src/build-data/arch/mips64.txt2
-rw-r--r--src/build-data/arch/ppc.txt6
-rw-r--r--src/build-data/arch/ppc64.txt8
-rw-r--r--src/build-data/arch/s390.txt2
-rw-r--r--src/build-data/arch/s390x.txt2
-rw-r--r--src/build-data/arch/sparc32.txt4
-rw-r--r--src/build-data/arch/sparc64.txt2
-rw-r--r--src/build-data/cc/gcc.txt3
-rw-r--r--src/engine/aes_isa_eng/aes_isa_engine.cpp54
-rw-r--r--src/engine/aes_isa_eng/aes_isa_engine.h26
-rw-r--r--src/engine/aes_isa_eng/info.txt3
-rw-r--r--src/hash/sha1_sse2/info.txt15
-rw-r--r--src/libstate/libstate.cpp8
-rw-r--r--src/timer/cpu_counter/info.txt2
-rw-r--r--src/utils/loadstor.h26
-rw-r--r--src/utils/simd_32/info.txt13
-rw-r--r--src/utils/xor_buf.h4
32 files changed, 482 insertions, 120 deletions
diff --git a/configure.py b/configure.py
index 93192abdd..5e2e4004d 100755
--- a/configure.py
+++ b/configure.py
@@ -37,9 +37,9 @@ class BuildConfigurationInformation(object):
"""
version_major = 1
version_minor = 9
- version_patch = 2
- version_so_patch = 2
- version_suffix = ''
+ version_patch = 3
+ version_so_patch = 3
+ version_suffix = '-dev'
version_string = '%d.%d.%d%s' % (
version_major, version_minor, version_patch, version_suffix)
@@ -78,7 +78,8 @@ class BuildConfigurationInformation(object):
if file.endswith('.cpp')])
self.python_sources = sorted(
- [os.path.join(self.python_dir, file) for file in os.listdir(self.python_dir)
+ [os.path.join(self.python_dir, file)
+ for file in os.listdir(self.python_dir)
if file.endswith('.cpp')])
def doc_files(self):
@@ -142,6 +143,11 @@ def process_command_line(args):
dest='unaligned_mem', action='store_false',
help=SUPPRESS_HELP)
+ target_group.add_option('--with-isa-extension', metavar='ISALIST',
+ dest='with_isa_extns',
+ action='append', default=[],
+ help='enable ISA extensions (sse2, altivec, ...)')
+
build_group = OptionGroup(parser, 'Build options')
build_group.add_option('--enable-shared', dest='build_shared_lib',
@@ -270,11 +276,13 @@ def process_command_line(args):
raise Exception('Bad value to --with-endian "%s"' % (
options.with_endian))
- def parse_module_opts(modules):
+ def parse_multiple_enable(modules):
return sorted(set(sum([s.split(',') for s in modules], [])))
- options.enabled_modules = parse_module_opts(options.enabled_modules)
- options.disabled_modules = parse_module_opts(options.disabled_modules)
+ options.enabled_modules = parse_multiple_enable(options.enabled_modules)
+ options.disabled_modules = parse_multiple_enable(options.disabled_modules)
+
+ options.with_isa_extns = parse_multiple_enable(options.with_isa_extns)
return options
@@ -361,6 +369,7 @@ class ModuleInfo(object):
'define': None,
'modset': None,
'uses_tr1': 'false',
+ 'need_isa': None,
'note': '',
'mp_bits': 0 })
@@ -395,8 +404,21 @@ class ModuleInfo(object):
else:
self.uses_tr1 = False
- def compatible_cpu(self, arch, cpu):
- return self.arch == [] or (arch in self.arch or cpu in self.arch)
+ def compatible_cpu(self, archinfo, options):
+
+ arch_name = archinfo.basename
+ cpu_name = options.cpu
+
+ if self.arch != []:
+ if arch_name not in self.arch and cpu_name not in self.arch:
+ return False
+
+ if self.need_isa != None:
+ cpu_isa = archinfo.isa_extensions_in(cpu_name)
+ if self.need_isa not in cpu_isa:
+ return self.need_isa in options.with_isa_extns
+
+ return True
def compatible_os(self, os):
return self.os == [] or os in self.os
@@ -423,12 +445,21 @@ class ModuleInfo(object):
class ArchInfo(object):
def __init__(self, infofile):
lex_me_harder(infofile, self,
- ['aliases', 'submodels', 'submodel_aliases'],
- { 'default_submodel': None,
- 'endian': None,
+ ['aliases', 'submodels', 'submodel_aliases', 'isa_extn'],
+ { 'endian': None,
'unaligned': 'no'
})
+ def convert_isa_list(input):
+ isa_info = {}
+ for line in self.isa_extn:
+ (isa,cpus) = line.split(':')
+ for cpu in cpus.split(','):
+ isa_info.setdefault(cpu, []).append(isa)
+ return isa_info
+
+ self.isa_extn = convert_isa_list(self.isa_extn)
+
self.submodel_aliases = force_to_dict(self.submodel_aliases)
if self.unaligned == 'ok':
@@ -436,35 +467,53 @@ class ArchInfo(object):
else:
self.unaligned_ok = 0
+ """
+ Return ISA extensions specific to this CPU
+ """
+ def isa_extensions_in(self, cpu_type):
+ return sorted(self.isa_extn.get(cpu_type, []) +
+ self.isa_extn.get('all', []))
+
+ """
+ Return a list of all submodels for this arch
+ """
def all_submodels(self):
return sorted(zip(self.submodels, self.submodels) +
self.submodel_aliases.items(),
key = lambda k: len(k[0]), reverse = True)
- def defines(self, target_submodel, with_endian, unaligned_ok):
+ """
+ Return CPU-specific defines for build.h
+ """
+ def defines(self, options):
macros = ['TARGET_ARCH_IS_%s' % (self.basename.upper())]
def form_cpu_macro(cpu_name):
return cpu_name.upper().replace('.', '').replace('-', '_')
- if self.basename != target_submodel:
- macros.append('TARGET_CPU_IS_%s' % (
- form_cpu_macro(target_submodel)))
+ if self.basename != options.cpu:
+ macros.append('TARGET_CPU_IS_%s' % (form_cpu_macro(options.cpu)))
+
+ isa_extensions = sorted(set(
+ sum([self.isa_extensions_in(options.cpu),
+ options.with_isa_extns],
+ [])))
- if with_endian:
- macros.append('TARGET_CPU_IS_%s_ENDIAN' % (with_endian.upper()))
- elif self.endian != None:
- macros.append('TARGET_CPU_IS_%s_ENDIAN' % (self.endian.upper()))
+ for simd in isa_extensions:
+ macros.append('TARGET_CPU_HAS_%s' % (simd.upper()))
+ endian = options.with_endian or self.endian
+
+ if endian != None:
+ macros.append('TARGET_CPU_IS_%s_ENDIAN' % (endian.upper()))
+
+ unaligned_ok = options.unaligned_mem
if unaligned_ok is None:
unaligned_ok = self.unaligned_ok
+ if unaligned_ok:
+ logging.info('Assuming unaligned memory access works')
- if unaligned_ok:
- logging.info('Assuming unaligned memory access works on this CPU')
- macros.append('TARGET_UNALIGNED_LOADSTOR_OK %d' % (unaligned_ok))
-
- if self.basename == 'amd64':
- macros.append('TARGET_CPU_HAS_SSE2')
+ macros.append('TARGET_UNALIGNED_MEMORY_ACCESS_OK %d' % (unaligned_ok))
return macros
@@ -514,7 +563,9 @@ class CompilerInfo(object):
del self.mach_opt
-
+ """
+ Return the machine specific ABI flags
+ """
def mach_abi_link_flags(self, osname, arch, submodel):
abi_link = set()
@@ -526,6 +577,9 @@ class CompilerInfo(object):
return ''
return ' ' + ' '.join(abi_link)
+ """
+ Return the flags for MACH_OPT
+ """
def mach_opts(self, arch, submodel):
def submodel_fixup(tup):
@@ -541,6 +595,9 @@ class CompilerInfo(object):
return ''
+ """
+ Return the flags for LIB_OPT
+ """
def library_opt_flags(self, debug_build):
flags = self.lib_opt_flags
if debug_build and self.debug_flags != '':
@@ -549,11 +606,17 @@ class CompilerInfo(object):
flags += ' ' + self.no_debug_flags
return flags
+ """
+ Return the command needed to link a shared object
+ """
def so_link_command_for(self, osname):
if osname in self.so_link_flags:
return self.so_link_flags[osname]
return self.so_link_flags['default']
+ """
+ Return defines for build.h
+ """
def defines(self, with_tr1):
def tr1_macro():
@@ -783,9 +846,7 @@ def create_template_vars(build_config, options, modules, cc, arch, osinfo):
'target_compiler_defines': make_cpp_macros(
cc.defines(options.with_tr1)),
- 'target_cpu_defines': make_cpp_macros(
- arch.defines(options.cpu, options.with_endian,
- options.unaligned_mem)),
+ 'target_cpu_defines': make_cpp_macros(arch.defines(options)),
'include_files': makefile_list(build_config.headers),
@@ -840,7 +901,7 @@ def create_template_vars(build_config, options, modules, cc, arch, osinfo):
"""
Determine which modules to load based on options, target, etc
"""
-def choose_modules_to_use(options, modules):
+def choose_modules_to_use(modules, archinfo, options):
to_load = []
maybe_dep = []
@@ -855,7 +916,7 @@ def choose_modules_to_use(options, modules):
elif modname in options.enabled_modules:
to_load.append(modname) # trust the user
- elif not module.compatible_cpu(options.arch, options.cpu):
+ elif not module.compatible_cpu(archinfo, options):
cannot_use_because(modname, 'CPU incompatible')
elif not module.compatible_os(options.os):
cannot_use_because(modname, 'OS incompatible')
@@ -1047,8 +1108,8 @@ def setup_build(build_config, options, template_vars):
os.path.join(build_config.build_dir, sink)
if options.boost_python:
- templates_to_proc[
- os.path.join(options.makefile_dir, 'python.in')] = 'Makefile.python'
+ template = os.path.join(options.makefile_dir, 'python.in')
+ templates_to_proc[template] = 'Makefile.python'
for (template, sink) in templates_to_proc.items():
try:
@@ -1123,9 +1184,10 @@ def main(argv = None):
logging.info('Guessing target processor is a %s/%s' % (
options.arch, options.cpu))
else:
+ cpu_from_user = options.cpu
(options.arch, options.cpu) = canon_processor(archinfo, options.cpu)
- logging.debug('Canonicalizized --cpu to %s/%s' % (
- options.arch, options.cpu))
+ logging.info('Canonicalizized --cpu=%s to %s/%s' % (
+ cpu_from_user, options.arch, options.cpu))
logging.info('Target is %s-%s-%s-%s' % (
options.compiler, options.os, options.arch, options.cpu))
@@ -1164,7 +1226,9 @@ def main(argv = None):
else:
options.with_tr1 = 'none'
- modules_to_use = choose_modules_to_use(options, modules)
+ modules_to_use = choose_modules_to_use(modules,
+ archinfo[options.arch],
+ options)
build_config = BuildConfigurationInformation(options, modules_to_use)
build_config.headers.append(
diff --git a/doc/log.txt b/doc/log.txt
index 97e40db5e..75fde9a5f 100644
--- a/doc/log.txt
+++ b/doc/log.txt
@@ -1,4 +1,7 @@
+* 1.9.3-dev, ????-??-??
+ - Set macros for available SIMD instructions in build.h
+
* 1.9.2, 2009-11-03
- Add SIMD version of XTEA
- Support both SSE2 and AltiVec SIMD for Serpent and XTEA
diff --git a/readme.txt b/readme.txt
index a192a1e1e..44f1f471c 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,4 +1,4 @@
-Botan 1.9.2, 2009-11-03
+Botan 1.9.3-dev, ????-??-??
Botan is a C++ class library for performing a wide variety of
cryptographic operations.
diff --git a/src/block/aes_intel/aes_intel.cpp b/src/block/aes_intel/aes_intel.cpp
new file mode 100644
index 000000000..057728e72
--- /dev/null
+++ b/src/block/aes_intel/aes_intel.cpp
@@ -0,0 +1,177 @@
+/**
+* AES
+* (C) 1999-2009 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/aes_intel.h>
+#include <wmmintrin.h>
+
+namespace Botan {
+
+namespace {
+
+__m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon)
+ {
+ key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(3,3,3,3));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+ return _mm_xor_si128(key, key_with_rcon);
+ }
+
+}
+
+/**
+* AES Encryption
+*/
+void AES_128_Intel::encrypt_n(const byte in[], byte out[], u32bit blocks) const
+ {
+ const __m128i* in_mm = (const __m128i*)in;
+ __m128i* out_mm = (__m128i*)out;
+
+ const __m128i* key_mm = (const __m128i*)&EK[0];
+
+ __m128i K0 = _mm_loadu_si128(key_mm);
+ __m128i K1 = _mm_loadu_si128(key_mm + 1);
+ __m128i K2 = _mm_loadu_si128(key_mm + 2);
+ __m128i K3 = _mm_loadu_si128(key_mm + 3);
+ __m128i K4 = _mm_loadu_si128(key_mm + 4);
+ __m128i K5 = _mm_loadu_si128(key_mm + 5);
+ __m128i K6 = _mm_loadu_si128(key_mm + 6);
+ __m128i K7 = _mm_loadu_si128(key_mm + 7);
+ __m128i K8 = _mm_loadu_si128(key_mm + 8);
+ __m128i K9 = _mm_loadu_si128(key_mm + 9);
+ __m128i K10 = _mm_loadu_si128(key_mm + 10);
+
+ for(u32bit i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+
+ B = _mm_xor_si128(B, K0);
+
+ B = _mm_aesenc_si128(B, K1);
+ B = _mm_aesenc_si128(B, K2);
+ B = _mm_aesenc_si128(B, K3);
+ B = _mm_aesenc_si128(B, K4);
+ B = _mm_aesenc_si128(B, K5);
+ B = _mm_aesenc_si128(B, K6);
+ B = _mm_aesenc_si128(B, K7);
+ B = _mm_aesenc_si128(B, K8);
+ B = _mm_aesenc_si128(B, K9);
+ B = _mm_aesenclast_si128(B, K10);
+
+ _mm_storeu_si128(out_mm + i, B);
+
+ in += BLOCK_SIZE;
+ out += BLOCK_SIZE;
+ }
+ }
+
+/**
+* AES Decryption
+*/
+void AES_128_Intel::decrypt_n(const byte in[], byte out[], u32bit blocks) const
+ {
+ const __m128i* in_mm = (const __m128i*)in;
+ __m128i* out_mm = (__m128i*)out;
+
+ const __m128i* key_mm = (const __m128i*)&DK[0];
+
+ __m128i K0 = _mm_loadu_si128(key_mm);
+ __m128i K1 = _mm_loadu_si128(key_mm + 1);
+ __m128i K2 = _mm_loadu_si128(key_mm + 2);
+ __m128i K3 = _mm_loadu_si128(key_mm + 3);
+ __m128i K4 = _mm_loadu_si128(key_mm + 4);
+ __m128i K5 = _mm_loadu_si128(key_mm + 5);
+ __m128i K6 = _mm_loadu_si128(key_mm + 6);
+ __m128i K7 = _mm_loadu_si128(key_mm + 7);
+ __m128i K8 = _mm_loadu_si128(key_mm + 8);
+ __m128i K9 = _mm_loadu_si128(key_mm + 9);
+ __m128i K10 = _mm_loadu_si128(key_mm + 10);
+
+ for(u32bit i = 0; i != blocks; ++i)
+ {
+ __m128i B = _mm_loadu_si128(in_mm + i);
+
+ B = _mm_xor_si128(B, K0);
+
+ B = _mm_aesdec_si128(B, K1);
+ B = _mm_aesdec_si128(B, K2);
+ B = _mm_aesdec_si128(B, K3);
+ B = _mm_aesdec_si128(B, K4);
+ B = _mm_aesdec_si128(B, K5);
+ B = _mm_aesdec_si128(B, K6);
+ B = _mm_aesdec_si128(B, K7);
+ B = _mm_aesdec_si128(B, K8);
+ B = _mm_aesdec_si128(B, K9);
+ B = _mm_aesdeclast_si128(B, K10);
+
+ _mm_storeu_si128(out_mm + i, B);
+
+ in += BLOCK_SIZE;
+ out += BLOCK_SIZE;
+ }
+ }
+
+/**
+* AES Key Schedule
+*/
+void AES_128_Intel::key_schedule(const byte key[], u32bit length)
+ {
+
+ #define AES_128_key_exp(K, RCON) \
+ aes_128_key_expansion(K, _mm_aeskeygenassist_si128(K, RCON))
+
+ __m128i K0 = _mm_loadu_si128((const __m128i*)key);
+ __m128i K1 = AES_128_key_exp(K0, 0x01);
+ __m128i K2 = AES_128_key_exp(K1, 0x02);
+ __m128i K3 = AES_128_key_exp(K2, 0x04);
+ __m128i K4 = AES_128_key_exp(K3, 0x08);
+ __m128i K5 = AES_128_key_exp(K4, 0x10);
+ __m128i K6 = AES_128_key_exp(K5, 0x20);
+ __m128i K7 = AES_128_key_exp(K6, 0x40);
+ __m128i K8 = AES_128_key_exp(K7, 0x80);
+ __m128i K9 = AES_128_key_exp(K8, 0x1B);
+ __m128i K10 = AES_128_key_exp(K9, 0x36);
+
+ __m128i* EK_mm = (__m128i*)&EK[0];
+ _mm_storeu_si128(EK_mm , K0);
+ _mm_storeu_si128(EK_mm + 1, K1);
+ _mm_storeu_si128(EK_mm + 2, K2);
+ _mm_storeu_si128(EK_mm + 3, K3);
+ _mm_storeu_si128(EK_mm + 4, K4);
+ _mm_storeu_si128(EK_mm + 5, K5);
+ _mm_storeu_si128(EK_mm + 6, K6);
+ _mm_storeu_si128(EK_mm + 7, K7);
+ _mm_storeu_si128(EK_mm + 8, K8);
+ _mm_storeu_si128(EK_mm + 9, K9);
+ _mm_storeu_si128(EK_mm + 10, K10);
+
+ // Now generate decryption keys
+
+ __m128i* DK_mm = (__m128i*)&DK[0];
+ _mm_storeu_si128(DK_mm , K10);
+ _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K9));
+ _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K8));
+ _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K7));
+ _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K6));
+ _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K5));
+ _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K4));
+ _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K3));
+ _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K2));
+ _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K1));
+ _mm_storeu_si128(DK_mm + 10, K0);
+ }
+
+/**
+* Clear memory of sensitive data
+*/
+void AES_128_Intel::clear()
+ {
+ EK.clear();
+ DK.clear();
+ }
+
+}
diff --git a/src/block/aes_intel/aes_intel.h b/src/block/aes_intel/aes_intel.h
new file mode 100644
index 000000000..90270939c
--- /dev/null
+++ b/src/block/aes_intel/aes_intel.h
@@ -0,0 +1,34 @@
+/**
+* AES using Intel's AES instructions
+* (C) 1999-2009 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_AES_INTEL_H__
+#define BOTAN_AES_INTEL_H__
+
+#include <botan/block_cipher.h>
+
+namespace Botan {
+
+class BOTAN_DLL AES_128_Intel : public BlockCipher
+ {
+ public:
+ void encrypt_n(const byte in[], byte out[], u32bit blocks) const;
+ void decrypt_n(const byte in[], byte out[], u32bit blocks) const;
+
+ void clear();
+ std::string name() const { return "AES-128"; }
+ BlockCipher* clone() const { return new AES_128_Intel; }
+
+ AES_128_Intel() : BlockCipher(16, 16) { }
+ private:
+ void key_schedule(const byte[], u32bit);
+
+ SecureBuffer<u32bit, 44> EK, DK;
+ };
+
+}
+
+#endif
diff --git a/src/block/aes_intel/info.txt b/src/block/aes_intel/info.txt
new file mode 100644
index 000000000..6e67a6ed9
--- /dev/null
+++ b/src/block/aes_intel/info.txt
@@ -0,0 +1,9 @@
+define AES_INTEL
+
+load_on auto
+
+need_isa aes_ni
+
+<requires>
+aes_isa_eng
+</requires>
diff --git a/src/build-data/arch/alpha.txt b/src/build-data/arch/alpha.txt
index b1d939ed1..2bf72edef 100644
--- a/src/build-data/arch/alpha.txt
+++ b/src/build-data/arch/alpha.txt
@@ -1,4 +1,3 @@
-default_submodel alpha-ev4
endian little
<aliases>
diff --git a/src/build-data/arch/amd64.txt b/src/build-data/arch/amd64.txt
index 96da0e3a9..6c1c2a7e4 100644
--- a/src/build-data/arch/amd64.txt
+++ b/src/build-data/arch/amd64.txt
@@ -1,5 +1,3 @@
-default_submodel opteron
-
endian little
unaligned ok
@@ -9,9 +7,12 @@ x86_64 # for RPM
</aliases>
<submodels>
-opteron
+k8
+k10
em64t
core2
+nehalem
+westmere
</submodels>
<submodel_aliases>
@@ -19,7 +20,15 @@ core2duo -> core2
intelcore2 -> core2
intelcore2duo -> core2
-amdopteron -> opteron
-athlon64 -> opteron
-k8 -> opteron
+sledgehammer -> k8
+opteron -> k8
+amdopteron -> k8
+athlon64 -> k8
+barcelona -> k10
</submodel_aliases>
+
+<isa_extn>
+sse2:all
+ssse3:core2,nehalem,westmere
+aes_ni:westmere
+</isa_extn>
diff --git a/src/build-data/arch/arm.txt b/src/build-data/arch/arm.txt
index 5f05d4cad..77f15b1d9 100644
--- a/src/build-data/arch/arm.txt
+++ b/src/build-data/arch/arm.txt
@@ -1,5 +1,3 @@
-default_submodel arm2
-
<submodels>
arm2
arm3
@@ -11,6 +9,8 @@ strongarm
strongarm110
strongarm1100
xscale
+cortex-a8
+cortex-a9
</submodels>
<submodel_aliases>
@@ -19,3 +19,7 @@ sa1100 -> strongarm1100
strongarm1110 -> strongarm1100
armv5tel -> xscale
</submodel_aliases>
+
+<isa_extn>
+neon:cortex-a8,cortex-a9
+</isa_extn>
diff --git a/src/build-data/arch/hitachi-sh.txt b/src/build-data/arch/hitachi-sh.txt
index 85a741f59..bab84b48f 100644
--- a/src/build-data/arch/hitachi-sh.txt
+++ b/src/build-data/arch/hitachi-sh.txt
@@ -1,5 +1,3 @@
-default_submodel hitachi-sh1
-
<submodels>
hitachi-sh1
hitachi-sh2
diff --git a/src/build-data/arch/hppa.txt b/src/build-data/arch/hppa.txt
index 67bca263d..0f7d90c68 100644
--- a/src/build-data/arch/hppa.txt
+++ b/src/build-data/arch/hppa.txt
@@ -1,5 +1,3 @@
-default_submodel hppa1.0
-
<aliases>
hp-pa
parisc
diff --git a/src/build-data/arch/ia32.txt b/src/build-data/arch/ia32.txt
index 0fe665e68..40066851d 100644
--- a/src/build-data/arch/ia32.txt
+++ b/src/build-data/arch/ia32.txt
@@ -1,5 +1,3 @@
-default_submodel i386
-
endian little
unaligned ok
@@ -22,6 +20,7 @@ pentium-m
prescott
k6
athlon
+atom
</submodels>
<submodel_aliases>
@@ -56,3 +55,8 @@ intelcput2500 -> prescott
intelcput2600 -> prescott
intelcput2700 -> prescott
</submodel_aliases>
+
+<isa_extn>
+sse2:pentium4,prescott,pentium-m,atom
+ssse3:atom
+</isa_extn>
diff --git a/src/build-data/arch/ia64.txt b/src/build-data/arch/ia64.txt
index 65309f0ff..55967d5ab 100644
--- a/src/build-data/arch/ia64.txt
+++ b/src/build-data/arch/ia64.txt
@@ -1,13 +1,14 @@
-# This is safe: only affects tuning, not ISA
-default_submodel itanium2
-
<aliases>
-merced
itanium
+itanic
</aliases>
<submodels>
-itanium1
-itanium2
+merced
mckinley
+montvale
</submodels>
+
+<submodel_aliases>
+itanium2 -> mckinley
+</submodel_aliases>
diff --git a/src/build-data/arch/m68k.txt b/src/build-data/arch/m68k.txt
index 3a8b5e8b3..691c45b92 100644
--- a/src/build-data/arch/m68k.txt
+++ b/src/build-data/arch/m68k.txt
@@ -1,5 +1,3 @@
-default_submodel 68020
-
endian big
# Except for Coldfire
diff --git a/src/build-data/arch/mips32.txt b/src/build-data/arch/mips32.txt
index ec9d4b5bf..b69abdacb 100644
--- a/src/build-data/arch/mips32.txt
+++ b/src/build-data/arch/mips32.txt
@@ -1,5 +1,3 @@
-default_submodel r3000
-
<aliases>
mips
mipsel # For Debian
diff --git a/src/build-data/arch/mips64.txt b/src/build-data/arch/mips64.txt
index 666ba7e18..a2fd5849b 100644
--- a/src/build-data/arch/mips64.txt
+++ b/src/build-data/arch/mips64.txt
@@ -1,5 +1,3 @@
-default_submodel r4400
-
<submodels>
r4000
r4100
diff --git a/src/build-data/arch/ppc.txt b/src/build-data/arch/ppc.txt
index 254643fdd..811eb53e7 100644
--- a/src/build-data/arch/ppc.txt
+++ b/src/build-data/arch/ppc.txt
@@ -1,8 +1,6 @@
endian big
unaligned ok
-default_submodel ppc604
-
<aliases>
powerpc
</aliases>
@@ -21,3 +19,7 @@ ppc750
ppc7400
ppc7450
</submodels>
+
+<isa_extn>
+altivec:ppc7400,ppc7450
+</isa_extn>
diff --git a/src/build-data/arch/ppc64.txt b/src/build-data/arch/ppc64.txt
index f044ba98d..b99aa9b8b 100644
--- a/src/build-data/arch/ppc64.txt
+++ b/src/build-data/arch/ppc64.txt
@@ -1,7 +1,5 @@
endian big
-default_submodel power4
-
<aliases>
powerpc64
</aliases>
@@ -16,9 +14,15 @@ ppc970
power3
power4
power5
+power6
+power7
cellppu
</submodels>
<submodel_aliases>
cellbroadbandengine -> cellppu
</submodel_aliases>
+
+<isa_extn>
+altivec:cellppu,ppc970,power6,power7
+</isa_extn>
diff --git a/src/build-data/arch/s390.txt b/src/build-data/arch/s390.txt
index 8024a4315..6e9f99f12 100644
--- a/src/build-data/arch/s390.txt
+++ b/src/build-data/arch/s390.txt
@@ -1,5 +1,3 @@
-default_submodel s390
-
endian big
unaligned ok
diff --git a/src/build-data/arch/s390x.txt b/src/build-data/arch/s390x.txt
index 00daab8b4..6f4271607 100644
--- a/src/build-data/arch/s390x.txt
+++ b/src/build-data/arch/s390x.txt
@@ -1,5 +1,3 @@
-default_submodel s390x
-
endian big
unaligned ok
diff --git a/src/build-data/arch/sparc32.txt b/src/build-data/arch/sparc32.txt
index 57b19c519..ae75a2a4b 100644
--- a/src/build-data/arch/sparc32.txt
+++ b/src/build-data/arch/sparc32.txt
@@ -1,7 +1,3 @@
-# V7 doesn't have integer multiply, so it will be bitterly slow for some things
-# (especially BigInt). Also, it's fairly rare nowadays, so we default to V8.
-default_submodel sparc32-v8
-
endian big
<aliases>
diff --git a/src/build-data/arch/sparc64.txt b/src/build-data/arch/sparc64.txt
index e308055fa..8f62e66f6 100644
--- a/src/build-data/arch/sparc64.txt
+++ b/src/build-data/arch/sparc64.txt
@@ -1,5 +1,3 @@
-default_submodel sparc64-ultra
-
<submodels>
sparc64-ultra
sparc64-ultra2
diff --git a/src/build-data/cc/gcc.txt b/src/build-data/cc/gcc.txt
index 370bb84d7..f25f81048 100644
--- a/src/build-data/cc/gcc.txt
+++ b/src/build-data/cc/gcc.txt
@@ -52,6 +52,9 @@ sparc64-ultra3 -> "-mcpu=v9 -mtune=ultrasparc3"
em64t -> "-march=nocona -momit-leaf-frame-pointer"
cellppu -> ""
+nehalem -> "-march=core2 -msse4.1 -msse4.2"
+westmere -> "-march=core2 -maes -msse4.1 -msse4.2"
+
# Default family options (SUBMODEL is substitued with the real submodel)
# Anything after the quotes is what should be *removed* from the submodel name
# before it's put into SUBMODEL.
diff --git a/src/engine/aes_isa_eng/aes_isa_engine.cpp b/src/engine/aes_isa_eng/aes_isa_engine.cpp
new file mode 100644
index 000000000..fa3b4ceab
--- /dev/null
+++ b/src/engine/aes_isa_eng/aes_isa_engine.cpp
@@ -0,0 +1,54 @@
+/*
+* Engine for AES instructions
+* (C) 2009 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/aes_isa_engine.h>
+#include <botan/cpuid.h>
+
+#if defined(BOTAN_HAS_AES_INTEL)
+ #include <botan/aes_intel.h>
+#endif
+
+#if defined(BOTAN_HAS_AES_VIA)
+ #include <botan/aes_via.h>
+#endif
+
+namespace Botan {
+
+BlockCipher*
+AES_ISA_Engine::find_block_cipher(const SCAN_Name& request,
+ Algorithm_Factory&) const
+ {
+#if defined(BOTAN_HAS_AES_INTEL)
+ if(CPUID::has_intel_aes())
+ {
+ if(request.algo_name() == "AES-128")
+ return new AES_128_Intel;
+ /*
+ if(request.algo_name() == "AES-192")
+ return new AES_192_Intel;
+ if(request.algo_name() == "AES-256")
+ return new AES_256_Intel;
+ */
+ }
+#endif
+
+#if defined(BOTAN_HAS_AES_VIA)
+ if(CPUID::has_via_aes())
+ {
+ if(request.algo_name() == "AES-128")
+ return new AES_128_VIA;
+ if(request.algo_name() == "AES-192")
+ return new AES_192_VIA;
+ if(request.algo_name() == "AES-256")
+ return new AES_256_VIA;
+ }
+#endif
+
+ return 0;
+ }
+
+}
diff --git a/src/engine/aes_isa_eng/aes_isa_engine.h b/src/engine/aes_isa_eng/aes_isa_engine.h
new file mode 100644
index 000000000..602a114a9
--- /dev/null
+++ b/src/engine/aes_isa_eng/aes_isa_engine.h
@@ -0,0 +1,26 @@
+/**
+* Engine for AES instructions
+* (C) 2009 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_AES_ISA_ENGINE_H__
+#define BOTAN_AES_ISA_ENGINE_H__
+
+#include <botan/engine.h>
+
+namespace Botan {
+
+class BOTAN_DLL AES_ISA_Engine : public Engine
+ {
+ public:
+ std::string provider_name() const { return "aes_isa"; }
+ private:
+ BlockCipher* find_block_cipher(const SCAN_Name&,
+ Algorithm_Factory&) const;
+ };
+
+}
+
+#endif
diff --git a/src/engine/aes_isa_eng/info.txt b/src/engine/aes_isa_eng/info.txt
new file mode 100644
index 000000000..c0695aaf3
--- /dev/null
+++ b/src/engine/aes_isa_eng/info.txt
@@ -0,0 +1,3 @@
+define ENGINE_AES_ISA
+
+load_on dep
diff --git a/src/hash/sha1_sse2/info.txt b/src/hash/sha1_sse2/info.txt
index ee61076b4..7a380753d 100644
--- a/src/hash/sha1_sse2/info.txt
+++ b/src/hash/sha1_sse2/info.txt
@@ -1,19 +1,8 @@
define SHA1_SSE2
+need_isa sse2
+
<requires>
sha1
simd_engine
</requires>
-
-<arch>
-pentium-m
-pentium4
-prescott
-amd64
-</arch>
-
-<cc>
-gcc
-icc
-msvc
-</cc>
diff --git a/src/libstate/libstate.cpp b/src/libstate/libstate.cpp
index c78bce62d..8b039a97a 100644
--- a/src/libstate/libstate.cpp
+++ b/src/libstate/libstate.cpp
@@ -37,6 +37,10 @@
#include <botan/eng_amd64.h>
#endif
+#if defined(BOTAN_HAS_ENGINE_AES_ISA)
+ #include <botan/aes_isa_engine.h>
+#endif
+
#if defined(BOTAN_HAS_ENGINE_SIMD)
#include <botan/simd_engine.h>
#endif
@@ -288,6 +292,10 @@ void Library_State::initialize(bool thread_safe)
engines.push_back(new OpenSSL_Engine);
#endif
+#if defined(BOTAN_HAS_ENGINE_AES_ISA)
+ engines.push_back(new AES_ISA_Engine);
+#endif
+
#if defined(BOTAN_HAS_ENGINE_SIMD)
engines.push_back(new SIMD_Engine);
#endif
diff --git a/src/timer/cpu_counter/info.txt b/src/timer/cpu_counter/info.txt
index d95e0fec5..2ab1343bc 100644
--- a/src/timer/cpu_counter/info.txt
+++ b/src/timer/cpu_counter/info.txt
@@ -12,10 +12,12 @@ gcc
</cc>
<arch>
+
# RDTSC: Pentium and up
i586
i686
athlon
+pentium3
pentium4
pentium-m
amd64
diff --git a/src/utils/loadstor.h b/src/utils/loadstor.h
index 8f430f36c..489a789f4 100644
--- a/src/utils/loadstor.h
+++ b/src/utils/loadstor.h
@@ -14,7 +14,7 @@
#include <botan/rotate.h>
#include <botan/prefetch.h>
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
#if defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
@@ -101,7 +101,7 @@ inline T load_le(const byte in[], u32bit off)
template<>
inline u16bit load_be<u16bit>(const byte in[], u32bit off)
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u16bit*>(in) + off));
#else
in += off * sizeof(u16bit);
@@ -112,7 +112,7 @@ inline u16bit load_be<u16bit>(const byte in[], u32bit off)
template<>
inline u16bit load_le<u16bit>(const byte in[], u32bit off)
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u16bit*>(in) + off));
#else
in += off * sizeof(u16bit);
@@ -123,7 +123,7 @@ inline u16bit load_le<u16bit>(const byte in[], u32bit off)
template<>
inline u32bit load_be<u32bit>(const byte in[], u32bit off)
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u32bit*>(in) + off));
#else
in += off * sizeof(u32bit);
@@ -134,7 +134,7 @@ inline u32bit load_be<u32bit>(const byte in[], u32bit off)
template<>
inline u32bit load_le<u32bit>(const byte in[], u32bit off)
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u32bit*>(in) + off));
#else
in += off * sizeof(u32bit);
@@ -145,7 +145,7 @@ inline u32bit load_le<u32bit>(const byte in[], u32bit off)
template<>
inline u64bit load_be<u64bit>(const byte in[], u32bit off)
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
return BOTAN_ENDIAN_N2B(*(reinterpret_cast<const u64bit*>(in) + off));
#else
in += off * sizeof(u64bit);
@@ -157,7 +157,7 @@ inline u64bit load_be<u64bit>(const byte in[], u32bit off)
template<>
inline u64bit load_le<u64bit>(const byte in[], u32bit off)
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
return BOTAN_ENDIAN_N2L(*(reinterpret_cast<const u64bit*>(in) + off));
#else
in += off * sizeof(u64bit);
@@ -281,7 +281,7 @@ inline void load_be(T out[],
*/
inline void store_be(u16bit in, byte out[2])
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
*reinterpret_cast<u16bit*>(out) = BOTAN_ENDIAN_B2N(in);
#else
out[0] = get_byte(0, in);
@@ -291,7 +291,7 @@ inline void store_be(u16bit in, byte out[2])
inline void store_le(u16bit in, byte out[2])
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
*reinterpret_cast<u16bit*>(out) = BOTAN_ENDIAN_L2N(in);
#else
out[0] = get_byte(1, in);
@@ -301,7 +301,7 @@ inline void store_le(u16bit in, byte out[2])
inline void store_be(u32bit in, byte out[4])
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
*reinterpret_cast<u32bit*>(out) = BOTAN_ENDIAN_B2N(in);
#else
out[0] = get_byte(0, in);
@@ -313,7 +313,7 @@ inline void store_be(u32bit in, byte out[4])
inline void store_le(u32bit in, byte out[4])
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
*reinterpret_cast<u32bit*>(out) = BOTAN_ENDIAN_L2N(in);
#else
out[0] = get_byte(3, in);
@@ -325,7 +325,7 @@ inline void store_le(u32bit in, byte out[4])
inline void store_be(u64bit in, byte out[8])
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
*reinterpret_cast<u64bit*>(out) = BOTAN_ENDIAN_B2N(in);
#else
out[0] = get_byte(0, in);
@@ -341,7 +341,7 @@ inline void store_be(u64bit in, byte out[8])
inline void store_le(u64bit in, byte out[8])
{
-#if BOTAN_TARGET_UNALIGNED_LOADSTOR_OK
+#if BOTAN_TARGET_UNALIGNED_MEMORY_ACCESS_OK
*reinterpret_cast<u64bit*>(out) = BOTAN_ENDIAN_L2N(in);
#else
out[0] = get_byte(7, in);
diff --git a/src/utils/simd_32/info.txt b/src/utils/simd_32/info.txt
index 64707c1e4..883096a5d 100644
--- a/src/utils/simd_32/info.txt
+++ b/src/utils/simd_32/info.txt
@@ -1,16 +1,3 @@
define SIMD_32
load_on always
-
-<arch>
-pentium-m
-pentium4
-prescott
-amd64
-</arch>
-
-<cc>
-gcc
-icc
-msvc
-</cc>
diff --git a/src/utils/xor_buf.h b/src/utils/xor_buf.h
index 39781f017..39c4a493d 100644
--- a/src/utils/xor_buf.h
+++ b/src/utils/xor_buf.h
@@ -22,7 +22,7 @@ inline void xor_buf(byte out[], const byte in[], u32bit length)
{
while(length >= 8)
{
-#if BOTAN_UNALIGNED_LOADSTOR_OK
+#if BOTAN_UNALIGNED_MEMORY_ACCESS_OK
*reinterpret_cast<u64bit*>(out) ^= *reinterpret_cast<const u64bit*>(in);
#else
out[0] ^= in[0]; out[1] ^= in[1];
@@ -51,7 +51,7 @@ inline void xor_buf(byte out[],
{
while(length >= 8)
{
-#if BOTAN_UNALIGNED_LOADSTOR_OK
+#if BOTAN_UNALIGNED_MEMORY_ACCESS_OK
*reinterpret_cast<u64bit*>(out) =
*reinterpret_cast<const u64bit*>(in) ^
*reinterpret_cast<const u64bit*>(in2);