From 093f98e42a3ba6d51a6c676070090dd06cc39bc7 Mon Sep 17 00:00:00 2001 From: lloyd Date: Tue, 29 Sep 2009 18:37:08 +0000 Subject: Add some basic prefetching support (only supported with GNU C++ or things that claim to be by defining __GNUG__ (such as Intel C++)) in new utils header prefetch.h --- src/utils/prefetch.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 src/utils/prefetch.h (limited to 'src/utils/prefetch.h') diff --git a/src/utils/prefetch.h b/src/utils/prefetch.h new file mode 100644 index 000000000..100829dce --- /dev/null +++ b/src/utils/prefetch.h @@ -0,0 +1,39 @@ +/* +* Prefetching Operations +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_PREFETCH_H__ +#define BOTAN_PREFETCH_H__ + +#include + +namespace Botan { + +inline void prefetch_readonly(const void* addr_void, u32bit length) + { +#if defined(__GNUG__) + const byte* addr = static_cast(addr_void); + const u32bit cl_size = CPUID::cache_line_size(); + + for(u32bit i = 0; i <= length; i += cl_size) + __builtin_prefetch(addr + i, 0); +#endif + } + +inline void prefetch_readwrite(const void* addr_void, u32bit length) + { +#if defined(__GNUG__) + const byte* addr = static_cast(addr_void); + const u32bit cl_size = CPUID::cache_line_size(); + + for(u32bit i = 0; i <= length; i += cl_size) + __builtin_prefetch(addr + i, 1); +#endif + } + +} + +#endif -- cgit v1.2.3 From 096ed3cfa340aa7c917da7a92ddade6dd69ab758 Mon Sep 17 00:00:00 2001 From: lloyd Date: Tue, 29 Sep 2009 19:04:53 +0000 Subject: Change the prefetching interface; move to PREFETCH namespace, and add a helper function for fetching both inputs and outputs of block ciphers. --- src/utils/prefetch.h | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'src/utils/prefetch.h') diff --git a/src/utils/prefetch.h b/src/utils/prefetch.h index 100829dce..72b6de689 100644 --- a/src/utils/prefetch.h +++ b/src/utils/prefetch.h @@ -12,28 +12,44 @@ namespace Botan { -inline void prefetch_readonly(const void* addr_void, u32bit length) +namespace PREFETCH { + +template +inline void readonly(const T* addr, u32bit length) { #if defined(__GNUG__) - const byte* addr = static_cast(addr_void); - const u32bit cl_size = CPUID::cache_line_size(); + const u32bit Ts_per_cache_line = CPUID::cache_line_size() / sizeof(T); - for(u32bit i = 0; i <= length; i += cl_size) + for(u32bit i = 0; i <= length; i += Ts_per_cache_line) __builtin_prefetch(addr + i, 0); #endif } -inline void prefetch_readwrite(const void* addr_void, u32bit length) +template +inline void readwrite(const T* addr, u32bit length) { #if defined(__GNUG__) - const byte* addr = static_cast(addr_void); - const u32bit cl_size = CPUID::cache_line_size(); + const u32bit Ts_per_cache_line = CPUID::cache_line_size() / sizeof(T); - for(u32bit i = 0; i <= length; i += cl_size) - __builtin_prefetch(addr + i, 1); + for(u32bit i = 0; i <= length; i += Ts_per_cache_line) + __builtin_prefetch(addr + i, 0); #endif } +inline void cipher_fetch(const byte* in_block, + const byte* out_block, + u32bit blocks, + u32bit block_size) + { + // Only prefetch input specifically if in != out + if(in_block != out_block) + readonly(in_block, blocks * block_size); + + readwrite(out_block, blocks * block_size); + } + +} + } #endif -- cgit v1.2.3 From 59b4eb0bd2d1b9d65c3921f5205a012f1a98fdf8 Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 30 Sep 2009 01:29:06 +0000 Subject: Disable prefetch in AES for now. Problem: with iterative modes like CBC, the prefetch is called for each block of input, and so a total of (4096+256)/64 = 68 prefetches are executed for each block. This reduces performance of iterative modes dramatically. I'm not sure what the right approach for dealing with this is. --- src/block/aes/aes.cpp | 8 -------- src/utils/prefetch.h | 12 ------------ 2 files changed, 20 deletions(-) (limited to 'src/utils/prefetch.h') diff --git a/src/block/aes/aes.cpp b/src/block/aes/aes.cpp index 8ef9cd8fe..34698ae7f 100644 --- a/src/block/aes/aes.cpp +++ b/src/block/aes/aes.cpp @@ -20,10 +20,6 @@ void AES::encrypt_n(const byte in[], byte out[], u32bit blocks) const const u32bit* TE2 = TE + 512; const u32bit* TE3 = TE + 768; - PREFETCH::readonly(TE, 1024); - PREFETCH::readonly(SE, 256); - PREFETCH::cipher_fetch(in, out, blocks, this->BLOCK_SIZE); - for(u32bit i = 0; i != blocks; ++i) { u32bit T0 = load_be(in, 0) ^ EK[0]; @@ -118,10 +114,6 @@ void AES::decrypt_n(const byte in[], byte out[], u32bit blocks) const const u32bit* TD2 = TD + 512; const u32bit* TD3 = TD + 768; - PREFETCH::readonly(TD, 1024); - PREFETCH::readonly(SD, 256); - PREFETCH::cipher_fetch(in, out, blocks, this->BLOCK_SIZE); - for(u32bit i = 0; i != blocks; ++i) { u32bit T0 = load_be(in, 0) ^ DK[0]; diff --git a/src/utils/prefetch.h b/src/utils/prefetch.h index 72b6de689..7afdbda0a 100644 --- a/src/utils/prefetch.h +++ b/src/utils/prefetch.h @@ -36,18 +36,6 @@ inline void readwrite(const T* addr, u32bit length) #endif } -inline void cipher_fetch(const byte* in_block, - const byte* out_block, - u32bit blocks, - u32bit block_size) - { - // Only prefetch input specifically if in != out - if(in_block != out_block) - readonly(in_block, blocks * block_size); - - readwrite(out_block, blocks * block_size); - } - } } -- cgit v1.2.3