diff options
author | Matt Turner <[email protected]> | 2016-04-08 15:30:30 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2016-04-12 14:36:56 -0700 |
commit | 0a5d8d9af42fd77fce1492d55f958da97816961a (patch) | |
tree | 23d55ac4016c3eab7e3a6651068c7fb22ce921b0 | |
parent | a191e6b719848a17963f185954f1696fa5a2bcb1 (diff) |
i965/tiled_memcpy: Optimize RGBA -> BGRA swizzle.
Replaces four byte loads and four byte stores with a load, bswap,
rotate, store; or a movbe, rotate, store.
Reviewed-by: Roland Scheidegger <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 0a68751d5d0..fa5ec755298 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -56,24 +56,27 @@ static const uint32_t ytile_width = 128; static const uint32_t ytile_height = 32; static const uint32_t ytile_span = 16; +static inline uint32_t +ror(uint32_t n, uint32_t d) +{ + return (n >> d) | (n << (32 - d)); +} + /** * Copy RGBA to BGRA - swap R and B. */ static inline void * rgba8_copy(void *dst, const void *src, size_t bytes) { - uint8_t *d = dst; - uint8_t const *s = src; + uint32_t *d = dst; + uint32_t const *s = src; assert(bytes % 4 == 0); while (bytes >= 4) { - d[0] = s[2]; - d[1] = s[1]; - d[2] = s[0]; - d[3] = s[3]; - d += 4; - s += 4; + *d = ror(__builtin_bswap32(*s), 8); + d += 1; + s += 1; bytes -= 4; } return dst; |