From 99d790538de2e7d7d489a8638b13c5aa069c27c3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 13 Oct 2016 12:37:59 -0700 Subject: vc4: Avoid loading from the texture during non-utile-aligned glTexImage(). Previously, the plan was "if the width/height we have to load/store isn't the size the user is planning on writing, then we need to load the old contents out beforehand to prevent writing back undefined". However, when we're doing glTexImage() we often end up aligning the width/height into the padding of the texture, and we don't actually need to read out that padding. Improves x11perf -aatrapezoid100 performance from ~460/sec to ~700/sec. --- src/gallium/drivers/vc4/vc4_resource.c | 46 +++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 4168079cd86..704cd71ea4b 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -284,26 +284,48 @@ vc4_resource_transfer_map(struct pipe_context *pctx, return NULL; /* We need to align the box to utile boundaries, since that's - * what load/store operate on. + * what load/store operates on. This may cause us to need to + * read out the original contents in that border area. Right + * now we just read out the entire contents, including the + * middle area that will just get overwritten. */ - uint32_t orig_width = ptrans->box.width; - uint32_t orig_height = ptrans->box.height; uint32_t box_start_x = ptrans->box.x & (utile_w - 1); uint32_t box_start_y = ptrans->box.y & (utile_h - 1); - ptrans->box.width += box_start_x; - ptrans->box.x -= box_start_x; - ptrans->box.height += box_start_y; - ptrans->box.y -= box_start_y; - ptrans->box.width = align(ptrans->box.width, utile_w); - ptrans->box.height = align(ptrans->box.height, utile_h); + bool needs_load = (usage & PIPE_TRANSFER_READ) != 0; + + if (box_start_x) { + ptrans->box.width += box_start_x; + ptrans->box.x -= box_start_x; + needs_load = true; + } + if (box_start_y) { + ptrans->box.height += box_start_y; + ptrans->box.y -= box_start_y; + needs_load = true; + } + if (ptrans->box.width & (utile_w - 1)) { + /* We only need to force a load if our border region + * we're extending into is actually part of the + * texture. + */ + uint32_t slice_width = u_minify(prsc->width0, level); + if (ptrans->box.x + ptrans->box.width != slice_width) + needs_load = true; + ptrans->box.width = align(ptrans->box.width, utile_w); + } + if (ptrans->box.height & (utile_h - 1)) { + uint32_t slice_height = u_minify(prsc->height0, level); + if (ptrans->box.y + ptrans->box.height != slice_height) + needs_load = true; + ptrans->box.height = align(ptrans->box.height, utile_h); + } ptrans->stride = ptrans->box.width * rsc->cpp; ptrans->layer_stride = ptrans->stride * ptrans->box.height; trans->map = malloc(ptrans->layer_stride * ptrans->box.depth); - if (usage & PIPE_TRANSFER_READ || - ptrans->box.width != orig_width || - ptrans->box.height != orig_height) { + + if (needs_load) { vc4_load_tiled_image(trans->map, ptrans->stride, buf + slice->offset + ptrans->box.z * rsc->cube_map_stride, -- cgit v1.2.3