diff options
Diffstat (limited to 'src/gallium/drivers/panfrost/pan_swizzle.c')
-rw-r--r-- | src/gallium/drivers/panfrost/pan_swizzle.c | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/src/gallium/drivers/panfrost/pan_swizzle.c b/src/gallium/drivers/panfrost/pan_swizzle.c new file mode 100644 index 00000000000..c021843846d --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_swizzle.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2012-2013 Luc Verhaegen <[email protected]> + * Copyright (c) 2018 Alyssa Rosenzweig <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include "pan_swizzle.h" +#include "pan_allocate.h" + +/* Space a group of 4-bits out. For instance, 0x7 -- that is, 0b111 -- would + * become 0b10101 */ + +static inline int +space_bits_4(int i) +{ + return ((i & 0x8) << 3) | + ((i & 0x4) << 2) | + ((i & 0x2) << 1) | + ((i & 0x1) << 0); +} + +/* Generate lookup table for the space filler curve. Note this is a 1:1 + * mapping, just with bits twiddled around. */ + +uint32_t space_filler[16][16]; +uint32_t space_filler_packed4[16][4]; + +void +panfrost_generate_space_filler_indices() +{ + for (int y = 0; y < 16; ++y) { + for (int x = 0; x < 16; ++x) { + space_filler[y][x] = + space_bits_4(y ^ x) | (space_bits_4(y) << 1); + } + + for (int q = 0; q < 4; ++q) { + space_filler_packed4[y][q] = + (space_filler[y][(q * 4) + 0] << 0) | + (space_filler[y][(q * 4) + 1] << 8) | + (space_filler[y][(q * 4) + 2] << 16) | + (space_filler[y][(q * 4) + 3] << 24); + } + } +} + +static void +swizzle_bpp1_align16(int width, int height, int source_stride, int block_pitch, + const uint8_t *pixels, + uint8_t *ldest) +{ + for (int y = 0; y < height; ++y) { + { + int block_y = y & ~(0x0f); + int rem_y = y & 0x0f; + uint8_t *block_start_s = ldest + (block_y * block_pitch); + const uint8_t *source_start = pixels + (y * source_stride); + const uint8_t *source_end = source_start + width; + + /* Operate on blocks of 16 pixels to minimise bookkeeping */ + + for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { + const uint32_t *src_32 = (const uint32_t *) source_start; + + for (int q = 0; q < 4; ++q) { + uint32_t src = src_32[q]; + uint32_t spaced = space_filler_packed4[rem_y][q]; + uint16_t *bs = (uint16_t *) block_start_s; + + int spacedA = (spaced >> 0) & 0xFF; + int spacedB = (spaced >> 16) & 0xFF; + + bs[spacedA >> 1] = (src >> 0) & 0xFFFF; + bs[spacedB >> 1] = (src >> 16) & 0xFFFF; + } + } + } + + ++y; + + { + int block_y = y & ~(0x0f); + int rem_y = y & 0x0f; + uint8_t *block_start_s = ldest + (block_y * block_pitch); + const uint8_t *source_start = pixels + (y * source_stride); + const uint8_t *source_end = source_start + width; + + /* Operate on blocks of 16 pixels to minimise bookkeeping */ + + for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { + const uint32_t *src_32 = (const uint32_t *) source_start; + + for (int q = 0; q < 4; ++q) { + uint32_t src = src_32[q]; + uint32_t spaced = space_filler_packed4[rem_y][q]; + + block_start_s[(spaced >> 0) & 0xFF] = (src >> 0) & 0xFF; + block_start_s[(spaced >> 8) & 0xFF] = (src >> 8) & 0xFF; + + block_start_s[(spaced >> 16) & 0xFF] = (src >> 16) & 0xFF; + block_start_s[(spaced >> 24) & 0xFF] = (src >> 24) & 0xFF; + } + } + } + + } +} + +static void +swizzle_bpp4_align16(int width, int height, int source_stride, int block_pitch, + const uint32_t *pixels, + uint32_t *ldest) +{ + for (int y = 0; y < height; ++y) { + int block_y = y & ~(0x0f); + int rem_y = y & 0x0f; + uint32_t *block_start_s = ldest + (block_y * block_pitch); + const uint32_t *source_start = pixels + (y * source_stride); + const uint32_t *source_end = source_start + width; + + /* Operate on blocks of 16 pixels to minimise bookkeeping */ + + for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { + for (int j = 0; j < 16; ++j) + block_start_s[space_filler[rem_y][j]] = source_start[j]; + } + } +} + +void +panfrost_texture_swizzle(int width, int height, int bytes_per_pixel, int source_stride, + const uint8_t *pixels, + uint8_t *ldest) +{ + /* Calculate maximum size, overestimating a bit */ + int block_pitch = ALIGN(width, 16) >> 4; + + /* Use fast path if available */ + if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) { + swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest); + return; + } else if (bytes_per_pixel == 1 /* && (ALIGN(width, 16) == width) */) { + swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest); + return; + } + + /* Otherwise, default back on generic path */ + + for (int y = 0; y < height; ++y) { + int block_y = y >> 4; + int rem_y = y & 0x0F; + int block_start_s = block_y * block_pitch * 256; + int source_start = y * source_stride; + + for (int x = 0; x < width; ++x) { + int block_x_s = (x >> 4) * 256; + int rem_x = x & 0x0F; + + int index = space_filler[rem_y][rem_x]; + const uint8_t *source = &pixels[source_start + bytes_per_pixel * x]; + uint8_t *dest = ldest + bytes_per_pixel * (block_start_s + block_x_s + index); + + for (int b = 0; b < bytes_per_pixel; ++b) + dest[b] = source[b]; + } + } +} + + +unsigned +panfrost_swizzled_size(int width, int height, int bytes_per_pixel) +{ + /* Calculate maximum size, overestimating a bit */ + int block_pitch = ALIGN(width, 16) >> 4; + unsigned sz = bytes_per_pixel * 256 * ((height >> 4) + 1) * block_pitch; + + return sz; +} + +#if 0 +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#define TW 1920 +#define TH 1080 +void +main() +{ + panfrost_generate_space_filler_indices(); + + uint8_t in[TW * TH * 4]; + + for (int i = 0; i < TW * TH * 4; ++i) in[i] = i; + + uint8_t *out = malloc(TW * TH * 4 * 2); + + for (int i = 0; i < 60; ++i) { + //swizzle_bpp4_align16(TW, TH, TW*4, TW>>4, (uint32_t *) in, (uint32_t *) out); + //panfrost_texture_swizzle_bpp4(TW, TH, TW*4, (uint32_t *) in, (uint32_t *) out); + //panfrost_texture_swizzle(TW, TH, 4, TW*4, (uint32_t *) in, (uint32_t *) out); + + int block_pitch = ALIGN(TW, 16) >> 4; + swizzle_bpp1_align16(TW, TH, TW, (block_pitch * 256 >> 4), in, (uint8_t *) out); + } + +#if 0 + uint8_t *reference = malloc(TW * TH * 4 * 2); + panfrost_texture_swizzle(TW, TH, 1, TW, (uint8_t *) in, (uint8_t *) reference); + + if (memcmp(reference, out, TW * TH * 4)) printf("XXX\n"); + +#endif + printf("ref %X\n", out[0]); +} +#endif |