diff options
author | Marek Olšák <[email protected]> | 2015-08-29 02:32:13 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2015-09-01 21:51:14 +0200 |
commit | 0c2eed0edec877584c9362bd9cb9004ff10a8b91 (patch) | |
tree | 4a2d13635ef732a145d59ec738c90ae4cda3ded2 /src/gallium | |
parent | c2a42d1f9ff104c562822d7ab2cbaa361666266b (diff) |
radeonsi: avoid redundant CB and DB register updates
The main idea is to avoid setting CB_COLORi_INFO = 0 for i>0 repeatedly
when those colorbuffers aren't used. This is mainly for glamor.
Same for DB. Z_INFO and STENCIL_INFO need to be cleared only once.
Reviewed-by: Alex Deucher <[email protected]>
Acked-by: Christian König <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_texture.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_hw_context.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 26 |
7 files changed, 36 insertions, 12 deletions
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 22a0950a491..08b2f644cad 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -395,7 +395,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers, if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) { evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom, - &buffers, color); + &buffers, NULL, color); if (!buffers) return; /* all buffers have been fast cleared */ } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 29db1cc4e07..d22c230ea3c 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -562,7 +562,7 @@ unsigned r600_translate_colorswap(enum pipe_format format); void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, - unsigned *buffers, + unsigned *buffers, unsigned *dirty_cbufs, const union pipe_color_union *color); void r600_init_screen_texture_functions(struct r600_common_screen *rscreen); void r600_init_context_texture_functions(struct r600_common_context *rctx); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 54696910e43..89f18fb106f 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1217,7 +1217,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex, void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, - unsigned *buffers, + unsigned *buffers, unsigned *dirty_cbufs, const union pipe_color_union *color) { int i; @@ -1279,6 +1279,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, tex->cmask.offset, tex->cmask.size, 0, true); tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; + if (dirty_cbufs) + *dirty_cbufs |= 1 << i; rctx->set_atom_dirty(rctx, fb_state, true); *buffers &= ~clear_bit; } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index c28b2a80088..d1486bd822d 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -336,8 +336,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, zsbuf ? (struct r600_texture*)zsbuf->texture : NULL; if (buffers & PIPE_CLEAR_COLOR) { - evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom, - &buffers, color); + evergreen_do_fast_color_clear(&sctx->b, fb, + &sctx->framebuffer.atom, &buffers, + &sctx->framebuffer.dirty_cbufs, + color); if (!buffers) return; /* all buffers have been fast cleared */ } @@ -374,6 +376,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, } zstex->depth_clear_value = depth; + sctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */ sctx->db_depth_clear = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 2381b6c0004..561378196b5 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -184,8 +184,11 @@ void si_begin_new_cs(struct si_context *ctx) /* The CS initialization should be emitted before everything else. */ si_pm4_emit(ctx, ctx->init_config); - si_mark_atom_dirty(ctx, &ctx->clip_regs); + ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; + ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); + + si_mark_atom_dirty(ctx, &ctx->clip_regs); si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs); si_mark_atom_dirty(ctx, &ctx->msaa_config); si_mark_atom_dirty(ctx, &ctx->db_render_state); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 47ad619ccdc..9be4aa7d5b5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -127,6 +127,8 @@ struct si_framebuffer { unsigned cb0_is_integer; unsigned compressed_cb_mask; unsigned export_16bpc; + unsigned dirty_cbufs; + bool dirty_zsbuf; }; struct si_scissors { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 88964e1a545..3c250484e3a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2109,6 +2109,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, SI_CONTEXT_INV_TC_L2 | SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + /* Take the maximum of the old and new count. If the new count is lower, + * dirtying is needed to disable the unbound colorbuffers. + */ + sctx->framebuffer.dirty_cbufs |= + (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; + sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; + util_copy_framebuffer_state(&sctx->framebuffer.state, state); sctx->framebuffer.export_16bpc = 0; @@ -2219,6 +2226,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom /* Colorbuffers. */ for (i = 0; i < nr_cbufs; i++) { + if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) + continue; + cb = (struct r600_surface*)state->cbufs[i]; if (!cb) { r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, @@ -2259,17 +2269,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */ } /* set CB_COLOR1_INFO for possible dual-src blending */ - if (i == 1 && state->cbufs[0]) { + if (i == 1 && state->cbufs[0] && + sctx->framebuffer.dirty_cbufs & (1 << 0)) { r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, cb->cb_color_info | tex->cb_color_info); i++; } - for (; i < 8 ; i++) { - r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); - } + for (; i < 8 ; i++) + if (sctx->framebuffer.dirty_cbufs & (1 << i)) + r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); /* ZS buffer. */ - if (state->zsbuf) { + if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { struct r600_surface *zb = (struct r600_surface*)state->zsbuf; struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; @@ -2304,7 +2315,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, zb->pa_su_poly_offset_db_fmt_cntl); - } else { + } else if (sctx->framebuffer.dirty_zsbuf) { r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ @@ -2314,6 +2325,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); + + sctx->framebuffer.dirty_cbufs = 0; + sctx->framebuffer.dirty_zsbuf = false; } static void si_emit_msaa_sample_locs(struct si_context *sctx, |