summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2018-08-07 17:53:24 -0700
committerEric Anholt <eric@anholt.net>2018-08-08 15:53:25 -0700
commit3e06b918aab3c6a4ca30f5e935aa6996f7009d56 (patch)
tree6a24cfffee8ecaf22d2e446472d591310d316c92 /src
parentd6a174669f1452e32ffd24e31cd4b1d4a6e09295 (diff)
vc4: Compile the LT image helper per cpp we might load/store.
For the partial load/store support I'm about to add, we want the memcpy to be compiled out to a single load/store. This should also eliminate the calls to vc4_utile_width/height(). Improves x11perf -putimage100 performance by 3.76344% +/- 1.16978% (n=15)
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/vc4/vc4_tiling_lt.c33
1 files changed, 31 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_tiling_lt.c b/src/gallium/drivers/vc4/vc4_tiling_lt.c
index b8f4c0405c2..8c875e7bd3a 100644
--- a/src/gallium/drivers/vc4/vc4_tiling_lt.c
+++ b/src/gallium/drivers/vc4/vc4_tiling_lt.c
@@ -289,12 +289,40 @@ vc4_lt_image_helper(void *gpu, uint32_t gpu_stride,
}
}
+static inline void
+vc4_lt_image_cpp_helper(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, const struct pipe_box *box, bool to_cpu)
+{
+ switch (cpp) {
+ case 1:
+ vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 1, box,
+ to_cpu);
+ break;
+ case 2:
+ vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 2, box,
+ to_cpu);
+ break;
+ case 4:
+ vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 4, box,
+ to_cpu);
+ break;
+ case 8:
+ vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 8, box,
+ to_cpu);
+ break;
+ default:
+ unreachable("bad cpp");
+ }
+}
+
void
NEON_TAG(vc4_load_lt_image)(void *dst, uint32_t dst_stride,
void *src, uint32_t src_stride,
int cpp, const struct pipe_box *box)
{
- vc4_lt_image_helper(src, src_stride, dst, dst_stride, cpp, box, true);
+ vc4_lt_image_cpp_helper(src, src_stride, dst, dst_stride, cpp, box,
+ true);
}
void
@@ -302,5 +330,6 @@ NEON_TAG(vc4_store_lt_image)(void *dst, uint32_t dst_stride,
void *src, uint32_t src_stride,
int cpp, const struct pipe_box *box)
{
- vc4_lt_image_helper(dst, dst_stride, src, src_stride, cpp, box, false);
+ vc4_lt_image_cpp_helper(dst, dst_stride, src, src_stride, cpp, box,
+ false);
}