From fd1e0bcf053a1f154604ecd6dcd82f7e06a5726c Mon Sep 17 00:00:00 2001 From: lloyd Date: Fri, 23 Oct 2009 01:25:52 +0000 Subject: Add new store_[l|b]e variants taking 8 values. Add new load options that are passed a number of variables by reference, setting them all at once. Will allow for batching operations (eg using SIMD operations to do 128-bit wide bswaps) for future optimizations. --- src/utils/loadstor.h | 124 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 16 deletions(-) (limited to 'src/utils/loadstor.h') diff --git a/src/utils/loadstor.h b/src/utils/loadstor.h index 8c64deaee..6f91c2fa5 100644 --- a/src/utils/loadstor.h +++ b/src/utils/loadstor.h @@ -166,6 +166,70 @@ inline u64bit load_le(const byte in[], u32bit off) #endif } +template +inline void load_le(const byte in[], T& x0, T& x1) + { + x0 = load_le(in, 0); + x1 = load_le(in, 1); + } + +template +inline void load_le(const byte in[], + T& x0, T& x1, T& x2, T& x3) + { + x0 = load_le(in, 0); + x1 = load_le(in, 1); + x2 = load_le(in, 2); + x3 = load_le(in, 3); + } + +template +inline void load_le(const byte in[], + T& x0, T& x1, T& x2, T& x3, + T& x4, T& x5, T& x6, T& x7) + { + x0 = load_le(in, 0); + x1 = load_le(in, 1); + x2 = load_le(in, 2); + x3 = load_le(in, 3); + x4 = load_le(in, 4); + x5 = load_le(in, 5); + x6 = load_le(in, 6); + x7 = load_le(in, 7); + } + +template +inline void load_be(const byte in[], T& x0, T& x1) + { + x0 = load_be(in, 0); + x1 = load_be(in, 1); + } + +template +inline void load_be(const byte in[], + T& x0, T& x1, T& x2, T& x3) + { + x0 = load_be(in, 0); + x1 = load_be(in, 1); + x2 = load_be(in, 2); + x3 = load_be(in, 3); + } + +template +inline void load_be(const byte in[], + T& x0, T& x1, T& x2, T& x3, + T& x4, T& x5, T& x6, T& x7) + { + x0 = load_be(in, 0); + x1 = load_be(in, 1); + x2 = load_be(in, 2); + x3 = load_be(in, 3); + x4 = load_be(in, 4); + x5 = load_be(in, 5); + x6 = load_be(in, 6); + x7 = load_be(in, 7); + } + /* * Endian-Specific Word Storing Operations */ @@ -246,35 +310,63 @@ inline void store_le(u64bit in, byte out[8]) } template -inline void store_le(byte out[], T a, T b) +inline void store_le(byte out[], T x0, T x1) + { + store_le(x0, out + (0 * sizeof(T))); + store_le(x1, out + (1 * sizeof(T))); + } + +template +inline void store_be(byte out[], T x0, T x1) + { + store_be(x0, out + (0 * sizeof(T))); + store_be(x1, out + (1 * sizeof(T))); + } + +template +inline void store_le(byte out[], T x0, T x1, T x2, T x3) { - store_le(a, out + (0 * sizeof(T))); - store_le(b, out + (1 * sizeof(T))); + store_le(x0, out + (0 * sizeof(T))); + store_le(x1, out + (1 * sizeof(T))); + store_le(x2, out + (2 * sizeof(T))); + store_le(x3, out + (3 * sizeof(T))); } template -inline void store_be(byte out[], T a, T b) +inline void store_be(byte out[], T x0, T x1, T x2, T x3) { - store_be(a, out + (0 * sizeof(T))); - store_be(b, out + (1 * sizeof(T))); + store_be(x0, out + (0 * sizeof(T))); + store_be(x1, out + (1 * sizeof(T))); + store_be(x2, out + (2 * sizeof(T))); + store_be(x3, out + (3 * sizeof(T))); } template -inline void store_le(byte out[], T a, T b, T c, T d) +inline void store_le(byte out[], T x0, T x1, T x2, T x3, + T x4, T x5, T x6, T x7) { - store_le(a, out + (0 * sizeof(T))); - store_le(b, out + (1 * sizeof(T))); - store_le(c, out + (2 * sizeof(T))); - store_le(d, out + (3 * sizeof(T))); + store_le(x0, out + (0 * sizeof(T))); + store_le(x1, out + (1 * sizeof(T))); + store_le(x2, out + (2 * sizeof(T))); + store_le(x3, out + (3 * sizeof(T))); + store_le(x4, out + (4 * sizeof(T))); + store_le(x5, out + (5 * sizeof(T))); + store_le(x6, out + (6 * sizeof(T))); + store_le(x7, out + (7 * sizeof(T))); } template -inline void store_be(byte out[], T a, T b, T c, T d) +inline void store_be(byte out[], T x0, T x1, T x2, T x3, + T x4, T x5, T x6, T x7) { - store_be(a, out + (0 * sizeof(T))); - store_be(b, out + (1 * sizeof(T))); - store_be(c, out + (2 * sizeof(T))); - store_be(d, out + (3 * sizeof(T))); + store_be(x0, out + (0 * sizeof(T))); + store_be(x1, out + (1 * sizeof(T))); + store_be(x2, out + (2 * sizeof(T))); + store_be(x3, out + (3 * sizeof(T))); + store_be(x4, out + (4 * sizeof(T))); + store_be(x5, out + (5 * sizeof(T))); + store_be(x6, out + (6 * sizeof(T))); + store_be(x7, out + (7 * sizeof(T))); } } -- cgit v1.2.3