diff options
author | lloyd <[email protected]> | 2009-10-29 17:34:52 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2009-10-29 17:34:52 +0000 |
commit | 7462977b8e5eb95a81a6253dc6e6224334ad6ae9 (patch) | |
tree | d14b468f7a04b02635b11bda017cca56259275b5 /src/utils/loadstor.h | |
parent | 5553c5cf54563280a4ffc94baab7b94a83cb0000 (diff) |
Add a new looping load_be / load_le for loading large arrays at once, and
change some of the hash functions to use it as low hanging fruit.
Probably could use further optimization (just unrolls x4 currently), but
merely having it as syntax is good as it allows optimizing many functions
at once (eg using SSE2 to do 4-way byteswaps).
Diffstat (limited to 'src/utils/loadstor.h')
-rw-r--r-- | src/utils/loadstor.h | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/src/utils/loadstor.h b/src/utils/loadstor.h index 6f91c2fa5..8f430f36c 100644 --- a/src/utils/loadstor.h +++ b/src/utils/loadstor.h @@ -199,6 +199,29 @@ inline void load_le(const byte in[], } template<typename T> +inline void load_le(T out[], + const byte in[], + u32bit count) + { + const u32bit blocks = count - (count % 4); + const u32bit left = count - blocks; + + for(u32bit i = 0; i != blocks; i += 4) + { + out[0] = load_le<T>(in, 0); + out[1] = load_le<T>(in, 1); + out[2] = load_le<T>(in, 2); + out[3] = load_le<T>(in, 3); + + out += 4; + in += 4*sizeof(T); + } + + for(u32bit i = 0; i != left; ++i) + out[i] = load_le<T>(in, i); + } + +template<typename T> inline void load_be(const byte in[], T& x0, T& x1) { x0 = load_be<T>(in, 0); @@ -230,6 +253,29 @@ inline void load_be(const byte in[], x7 = load_be<T>(in, 7); } +template<typename T> +inline void load_be(T out[], + const byte in[], + u32bit count) + { + const u32bit blocks = count - (count % 4); + const u32bit left = count - blocks; + + for(u32bit i = 0; i != blocks; i += 4) + { + out[0] = load_be<T>(in, 0); + out[1] = load_be<T>(in, 1); + out[2] = load_be<T>(in, 2); + out[3] = load_be<T>(in, 3); + + out += 4; + in += 4*sizeof(T); + } + + for(u32bit i = 0; i != left; ++i) + out[i] = load_be<T>(in, i); + } + /* * Endian-Specific Word Storing Operations */ |