aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2016-04-11 11:47:21 -0700
committerMatt Turner <[email protected]>2016-04-12 14:36:59 -0700
commitfc88b4babf86e93421e7a9da29ae125712891390 (patch)
treea312b0d05e1be5a5e2b5c67ef227a367fe9bd329 /src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
parent0a5d8d9af42fd77fce1492d55f958da97816961a (diff)
i965/tiled_memcpy: Move SSSE3 code back into inline functions.
This will make adding SSE2 code a lot cleaner. Reviewed-by: Roland Scheidegger <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/intel_tiled_memcpy.c')
-rw-r--r--src/mesa/drivers/dri/i965/intel_tiled_memcpy.c42
1 files changed, 24 insertions, 18 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index fa5ec755298..5d585302288 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -85,6 +85,22 @@ rgba8_copy(void *dst, const void *src, size_t bytes)
#ifdef __SSSE3__
static const uint8_t rgba8_permutation[16] =
{ 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
+
+static inline void
+rgba8_copy_16_aligned_dst(void *dst, const void *src)
+{
+ _mm_store_si128(dst,
+ _mm_shuffle_epi8(_mm_loadu_si128(src),
+ *(__m128i *)rgba8_permutation));
+}
+
+static inline void
+rgba8_copy_16_aligned_src(void *dst, const void *src)
+{
+ _mm_storeu_si128(dst,
+ _mm_shuffle_epi8(_mm_load_si128(src),
+ *(__m128i *)rgba8_permutation));
+}
#endif
/**
@@ -93,23 +109,18 @@ static const uint8_t rgba8_permutation[16] =
static inline void *
rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
{
- uint8_t *d = dst;
- uint8_t const *s = src;
-
assert(bytes == 0 || !(((uintptr_t)dst) & 0xf));
#ifdef __SSSE3__
while (bytes >= 16) {
- _mm_store_si128((__m128i *)d,
- _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)s),
- *(__m128i *) rgba8_permutation));
- s += 16;
- d += 16;
+ rgba8_copy_16_aligned_dst(dst, src);
+ src += 16;
+ dst += 16;
bytes -= 16;
}
#endif
- rgba8_copy(d, s, bytes);
+ rgba8_copy(dst, src, bytes);
return dst;
}
@@ -120,23 +131,18 @@ rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
static inline void *
rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
{
- uint8_t *d = dst;
- uint8_t const *s = src;
-
assert(bytes == 0 || !(((uintptr_t)src) & 0xf));
#ifdef __SSSE3__
while (bytes >= 16) {
- _mm_storeu_si128((__m128i *)d,
- _mm_shuffle_epi8(_mm_load_si128((__m128i *)s),
- *(__m128i *) rgba8_permutation));
- s += 16;
- d += 16;
+ rgba8_copy_16_aligned_src(dst, src);
+ src += 16;
+ dst += 16;
bytes -= 16;
}
#endif
- rgba8_copy(d, s, bytes);
+ rgba8_copy(dst, src, bytes);
return dst;
}