From 3ba57bae47666ada1145259755fc326b1b9f9463 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 28 Dec 2014 08:14:19 -1000 Subject: vc4: Only render tiles where the scissor ever intersected them. This gives a 2.7x improvement in x11perf -rect100, since we only end up load/storing the x11perf window, not the whole screen. --- src/gallium/drivers/vc4/vc4_context.c | 37 +++++++++++++++++++++++++++-------- src/gallium/drivers/vc4/vc4_context.h | 10 ++++++++++ src/gallium/drivers/vc4/vc4_draw.c | 4 ++++ src/gallium/drivers/vc4/vc4_emit.c | 11 +++++++++-- 4 files changed, 52 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 401eb216f27..e4e433020fc 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -94,8 +94,15 @@ vc4_setup_rcl(struct vc4_context *vc4) uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared; uint32_t width = vc4->framebuffer.width; uint32_t height = vc4->framebuffer.height; - uint32_t xtiles = align(width, 64) / 64; - uint32_t ytiles = align(height, 64) / 64; + uint32_t stride_in_tiles = align(width, 64) / 64; + + assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0); + uint32_t min_x_tile = vc4->draw_min_x / 64; + uint32_t min_y_tile = vc4->draw_min_y / 64; + uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64; + uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64; + uint32_t xtiles = max_x_tile - min_x_tile + 1; + uint32_t ytiles = max_y_tile - min_y_tile + 1; #if 0 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n", @@ -171,10 +178,10 @@ vc4_setup_rcl(struct vc4_context *vc4) uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0; uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc); - for (int y = 0; y < ytiles; y++) { - for (int x = 0; x < xtiles; x++) { - bool end_of_frame = (x == xtiles - 1 && - y == ytiles - 1); + for (int y = min_y_tile; y <= max_y_tile; y++) { + for (int x = min_x_tile; x <= max_x_tile; x++) { + bool end_of_frame = (x == max_x_tile && + y == max_y_tile); bool coords_emitted = false; /* Note that the load doesn't actually occur until the @@ -225,13 +232,13 @@ vc4_setup_rcl(struct vc4_context *vc4) /* Wait for the binner before jumping to the first * tile's lists. */ - if (x == 0 && y == 0) + if (x == min_x_tile && y == min_y_tile) cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE); cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex, - (y * xtiles + x) * 32); + (y * stride_in_tiles + x) * 32); if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { vc4_tile_coordinates(vc4, x, y, &coords_emitted); @@ -313,6 +320,11 @@ vc4_draw_reset(struct vc4_context *vc4) vc4->dirty = ~0; vc4->resolve = 0; vc4->cleared = 0; + + vc4->draw_min_x = ~0; + vc4->draw_min_y = ~0; + vc4->draw_max_x = 0; + vc4->draw_max_y = 0; } void @@ -323,6 +335,15 @@ vc4_flush(struct pipe_context *pctx) if (!vc4->needs_flush) return; + /* The RCL setup would choke if the draw bounds cause no drawing, so + * just drop the drawing if that's the case. + */ + if (vc4->draw_max_x <= vc4->draw_min_x || + vc4->draw_max_y <= vc4->draw_min_y) { + vc4_draw_reset(vc4); + return; + } + /* Increment the semaphore indicating that binning is done and * unblocking the render thread. Note that this doesn't act until the * FLUSH completes. diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 962abbfa972..7e18a75e5b6 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -173,6 +173,16 @@ struct vc4_context { struct vc4_cl bo_handles; struct vc4_cl bo_pointers; uint32_t shader_rec_count; + /** @{ + * Bounding box of the scissor across all queued drawing. + * + * Note that the max values are exclusive. + */ + uint32_t draw_min_x; + uint32_t draw_min_y; + uint32_t draw_max_x; + uint32_t draw_max_y; + /** @} */ struct vc4_bo *tile_alloc; struct vc4_bo *tile_state; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index d99faa41c54..0d915040c88 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -316,6 +316,10 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, if (buffers & PIPE_CLEAR_STENCIL) vc4->clear_stencil = stencil; + vc4->draw_min_x = 0; + vc4->draw_min_y = 0; + vc4->draw_max_x = vc4->framebuffer.width; + vc4->draw_max_y = vc4->framebuffer.height; vc4->cleared |= buffers; vc4->resolve |= buffers; diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c index 68564410afc..d2b54fccf91 100644 --- a/src/gallium/drivers/vc4/vc4_emit.c +++ b/src/gallium/drivers/vc4/vc4_emit.c @@ -37,12 +37,19 @@ vc4_emit_state(struct pipe_context *pctx) float vp_maxy = fabs(vpscale[1]) + vptranslate[1]; uint32_t minx = MAX2(vc4->scissor.minx, vp_minx); uint32_t miny = MAX2(vc4->scissor.miny, vp_miny); + uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx); + uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy); cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW); cl_u16(&vc4->bcl, minx); cl_u16(&vc4->bcl, miny); - cl_u16(&vc4->bcl, MIN2(vc4->scissor.maxx, vp_maxx) - minx); - cl_u16(&vc4->bcl, MIN2(vc4->scissor.maxy, vp_maxy) - miny); + cl_u16(&vc4->bcl, maxx - minx); + cl_u16(&vc4->bcl, maxy - miny); + + vc4->draw_min_x = MIN2(vc4->draw_min_x, minx); + vc4->draw_min_y = MIN2(vc4->draw_min_y, miny); + vc4->draw_max_x = MAX2(vc4->draw_max_x, maxx); + vc4->draw_max_y = MAX2(vc4->draw_max_y, maxy); } if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) { -- cgit v1.2.3