aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-10-13 12:37:59 -0700
committerEric Anholt <[email protected]>2016-10-13 14:27:30 -0700
commit99d790538de2e7d7d489a8638b13c5aa069c27c3 (patch)
tree69782db7d286337514bfa40abcff983090fbab14 /src/gallium/drivers
parent0717cd975d85eb04389d84c54b796863aa537cf0 (diff)
vc4: Avoid loading from the texture during non-utile-aligned glTexImage().
Previously, the plan was "if the width/height we have to load/store isn't the size the user is planning on writing, then we need to load the old contents out beforehand to prevent writing back undefined". However, when we're doing glTexImage() we often end up aligning the width/height into the padding of the texture, and we don't actually need to read out that padding. Improves x11perf -aatrapezoid100 performance from ~460/sec to ~700/sec.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c46
1 files changed, 34 insertions, 12 deletions
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 4168079cd86..704cd71ea4b 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -284,26 +284,48 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
return NULL;
/* We need to align the box to utile boundaries, since that's
- * what load/store operate on.
+ * what load/store operates on. This may cause us to need to
+ * read out the original contents in that border area. Right
+ * now we just read out the entire contents, including the
+ * middle area that will just get overwritten.
*/
- uint32_t orig_width = ptrans->box.width;
- uint32_t orig_height = ptrans->box.height;
uint32_t box_start_x = ptrans->box.x & (utile_w - 1);
uint32_t box_start_y = ptrans->box.y & (utile_h - 1);
- ptrans->box.width += box_start_x;
- ptrans->box.x -= box_start_x;
- ptrans->box.height += box_start_y;
- ptrans->box.y -= box_start_y;
- ptrans->box.width = align(ptrans->box.width, utile_w);
- ptrans->box.height = align(ptrans->box.height, utile_h);
+ bool needs_load = (usage & PIPE_TRANSFER_READ) != 0;
+
+ if (box_start_x) {
+ ptrans->box.width += box_start_x;
+ ptrans->box.x -= box_start_x;
+ needs_load = true;
+ }
+ if (box_start_y) {
+ ptrans->box.height += box_start_y;
+ ptrans->box.y -= box_start_y;
+ needs_load = true;
+ }
+ if (ptrans->box.width & (utile_w - 1)) {
+ /* We only need to force a load if our border region
+ * we're extending into is actually part of the
+ * texture.
+ */
+ uint32_t slice_width = u_minify(prsc->width0, level);
+ if (ptrans->box.x + ptrans->box.width != slice_width)
+ needs_load = true;
+ ptrans->box.width = align(ptrans->box.width, utile_w);
+ }
+ if (ptrans->box.height & (utile_h - 1)) {
+ uint32_t slice_height = u_minify(prsc->height0, level);
+ if (ptrans->box.y + ptrans->box.height != slice_height)
+ needs_load = true;
+ ptrans->box.height = align(ptrans->box.height, utile_h);
+ }
ptrans->stride = ptrans->box.width * rsc->cpp;
ptrans->layer_stride = ptrans->stride * ptrans->box.height;
trans->map = malloc(ptrans->layer_stride * ptrans->box.depth);
- if (usage & PIPE_TRANSFER_READ ||
- ptrans->box.width != orig_width ||
- ptrans->box.height != orig_height) {
+
+ if (needs_load) {
vc4_load_tiled_image(trans->map, ptrans->stride,
buf + slice->offset +
ptrans->box.z * rsc->cube_map_stride,