aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2016-04-08 15:30:30 -0700
committerMatt Turner <[email protected]>2016-04-12 14:36:56 -0700
commit0a5d8d9af42fd77fce1492d55f958da97816961a (patch)
tree23d55ac4016c3eab7e3a6651068c7fb22ce921b0
parenta191e6b719848a17963f185954f1696fa5a2bcb1 (diff)
i965/tiled_memcpy: Optimize RGBA -> BGRA swizzle.
Replaces four byte loads and four byte stores with a load, bswap, rotate, store; or a movbe, rotate, store. Reviewed-by: Roland Scheidegger <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/intel_tiled_memcpy.c19
1 files changed, 11 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 0a68751d5d0..fa5ec755298 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -56,24 +56,27 @@ static const uint32_t ytile_width = 128;
static const uint32_t ytile_height = 32;
static const uint32_t ytile_span = 16;
+static inline uint32_t
+ror(uint32_t n, uint32_t d)
+{
+ return (n >> d) | (n << (32 - d));
+}
+
/**
* Copy RGBA to BGRA - swap R and B.
*/
static inline void *
rgba8_copy(void *dst, const void *src, size_t bytes)
{
- uint8_t *d = dst;
- uint8_t const *s = src;
+ uint32_t *d = dst;
+ uint32_t const *s = src;
assert(bytes % 4 == 0);
while (bytes >= 4) {
- d[0] = s[2];
- d[1] = s[1];
- d[2] = s[0];
- d[3] = s[3];
- d += 4;
- s += 4;
+ *d = ror(__builtin_bswap32(*s), 8);
+ d += 1;
+ s += 1;
bytes -= 4;
}
return dst;