summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2017-01-04 12:40:37 -0800
committerEric Anholt <[email protected]>2017-01-05 17:19:48 -0800
commite64b1169d37599a9ee1c5877aa457a41c5a8d726 (patch)
tree2855721b26f67677303755f8a627e6874421b4cb
parent7b8c67b3cc60066ea55d8178a70ae035dac3dd81 (diff)
vc4: Simplify the load/store utile functions.
They now have less of a dependency on the cpp, and don't have to do a divide. Hacking up mesa-demos teximage to do only one subtest and not draw points, I saw 1024x1024 glTexSubImage2D() improve by 4.86939% +/- 1.40408% (n=30) and glGetTexImage() by 2.18978% +/- 0.140268% (n=5).
-rw-r--r--src/gallium/drivers/vc4/vc4_tiling.c32
1 files changed, 22 insertions, 10 deletions
diff --git a/src/gallium/drivers/vc4/vc4_tiling.c b/src/gallium/drivers/vc4/vc4_tiling.c
index 4bcb85b16f5..390ebe555c2 100644
--- a/src/gallium/drivers/vc4/vc4_tiling.c
+++ b/src/gallium/drivers/vc4/vc4_tiling.c
@@ -87,6 +87,22 @@ vc4_utile_height(int cpp)
}
}
+/** Returns the stride in bytes of a 64-byte microtile. */
+static uint32_t
+vc4_utile_stride(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ return 8;
+ case 2:
+ case 4:
+ case 8:
+ return 16;
+ default:
+ unreachable("bad cpp");
+ }
+}
+
/**
* The texture unit decides what tiling format a particular miplevel is using
* this function, so we lay out our miptrees accordingly.
@@ -101,25 +117,21 @@ vc4_size_is_lt(uint32_t width, uint32_t height, int cpp)
void
vc4_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp)
{
- uint32_t utile_h = vc4_utile_height(cpp);
- uint32_t row_size = 64 / utile_h;
+ uint32_t src_stride = vc4_utile_stride(cpp);
- for (int y = 0; y < utile_h; y++) {
- memcpy(dst, src, row_size);
+ for (uint32_t src_offset = 0; src_offset < 64; src_offset += src_stride) {
+ memcpy(dst, src + src_offset, src_stride);
dst += dst_stride;
- src += row_size;
}
}
void
vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp)
{
- uint32_t utile_h = vc4_utile_height(cpp);
- uint32_t row_size = 64 / utile_h;
+ uint32_t dst_stride = vc4_utile_stride(cpp);
- for (int y = 0; y < utile_h; y++) {
- memcpy(dst, src, row_size);
- dst += row_size;
+ for (uint32_t dst_offset = 0; dst_offset < 64; dst_offset += dst_stride) {
+ memcpy(dst + dst_offset, src, dst_stride);
src += src_stride;
}
}