diff options
author | Marek Olšák <[email protected]> | 2018-10-06 22:44:36 -0400 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2018-10-16 17:23:25 -0400 |
commit | fcc70e4855c3bde3cadce9d0d1abf8da7106f643 (patch) | |
tree | 6c0ccaa5d5dcb71e083930cf5376d2ff57fda8c9 /src | |
parent | 25ddb15cfe36ff0d1f47e7b34161b545e6f329a8 (diff) |
radeonsi: track context rolls better for the Vega scissor bug workaround
We should get fewer context rolls with the SET_CONTEXT_REG optimization,
but it would have been for nothing if the scissor state rolled the context
anyway. Don't emit the scissor state if there is no context roll.
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 31 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 17 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_binning.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 32 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 23 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_viewport.c | 3 |
7 files changed, 80 insertions, 34 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 7ae17435ab6..6edc06cece7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1023,6 +1023,7 @@ struct si_context { unsigned num_resident_handles; uint64_t num_alloc_tex_transfer_bytes; unsigned last_tex_ps_draw_ratio; /* for query */ + unsigned context_roll_counter; /* Queries. */ /* Maintain the list of active queries for pausing between IBs. */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b63e70092db..8b2e6e57f45 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -88,9 +88,6 @@ static void si_emit_cb_render_state(struct si_context *sctx) (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) cb_target_mask = 0; - radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, - SI_TRACKED_CB_TARGET_MASK, cb_target_mask); - /* GFX9: Flush DFSM when CB_TARGET_MASK changes. * I think we don't have to do anything between IBs. */ @@ -102,6 +99,10 @@ static void si_emit_cb_render_state(struct si_context *sctx) radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); } + unsigned initial_cdw = cs->current.cdw; + radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, + SI_TRACKED_CB_TARGET_MASK, cb_target_mask); + if (sctx->chip_class >= VI) { /* DCC MSAA workaround for blending. * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- @@ -252,6 +253,8 @@ static void si_emit_cb_render_state(struct si_context *sctx) sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); } + if (initial_cdw != cs->current.cdw) + sctx->context_roll_counter++; } /* @@ -773,6 +776,7 @@ static void si_emit_clip_regs(struct si_context *sctx) clipdist_mask &= rs->clip_plane_enable; culldist_mask |= clipdist_mask; + unsigned initial_cdw = sctx->gfx_cs->current.cdw; radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL, vs_sel->pa_cl_vs_out_cntl | @@ -784,6 +788,9 @@ static void si_emit_clip_regs(struct si_context *sctx) rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space)); + + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } /* @@ -1352,6 +1359,7 @@ static void si_emit_db_render_state(struct si_context *sctx) { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned db_shader_control, db_render_control, db_count_control; + unsigned initial_cdw = sctx->gfx_cs->current.cdw; /* DB_RENDER_CONTROL */ if (sctx->dbcb_depth_copy_enabled || @@ -1434,6 +1442,9 @@ static void si_emit_db_render_state(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, db_shader_control); + + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } /* @@ -3489,6 +3500,8 @@ static void si_emit_msaa_config(struct si_context *sctx) } } + unsigned initial_cdw = cs->current.cdw; + /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */ radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, sc_line_cntl, @@ -3500,10 +3513,14 @@ static void si_emit_msaa_config(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1); - /* GFX9: Flush DFSM when the AA mode changes. */ - if (sctx->screen->dfsm_allowed) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); + if (initial_cdw != cs->current.cdw) { + sctx->context_roll_counter++; + + /* GFX9: Flush DFSM when the AA mode changes. */ + if (sctx->screen->dfsm_allowed) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); + } } } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f52296d1119..83589e6918c 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -171,17 +171,13 @@ union si_state { #define SI_STATE_BIT(name) (1 << SI_STATE_IDX(name)) #define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *)) -static inline unsigned si_states_that_roll_context(void) +static inline unsigned si_states_that_always_roll_context(void) { return (SI_STATE_BIT(blend) | SI_STATE_BIT(rasterizer) | SI_STATE_BIT(dsa) | SI_STATE_BIT(poly_offset) | - SI_STATE_BIT(es) | - SI_STATE_BIT(gs) | - SI_STATE_BIT(vgt_shader_config) | - SI_STATE_BIT(vs) | - SI_STATE_BIT(ps)); + SI_STATE_BIT(vgt_shader_config)); } union si_state_atoms { @@ -216,25 +212,18 @@ union si_state_atoms { sizeof(struct si_atom))) #define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct si_atom*)) -static inline unsigned si_atoms_that_roll_context(void) +static inline unsigned si_atoms_that_always_roll_context(void) { return (SI_ATOM_BIT(streamout_begin) | SI_ATOM_BIT(streamout_enable) | SI_ATOM_BIT(framebuffer) | SI_ATOM_BIT(msaa_sample_locs) | - SI_ATOM_BIT(db_render_state) | - SI_ATOM_BIT(dpbb_state) | - SI_ATOM_BIT(msaa_config) | SI_ATOM_BIT(sample_mask) | - SI_ATOM_BIT(cb_render_state) | SI_ATOM_BIT(blend_color) | - SI_ATOM_BIT(clip_regs) | SI_ATOM_BIT(clip_state) | - SI_ATOM_BIT(guardband) | SI_ATOM_BIT(scissors) | SI_ATOM_BIT(viewports) | SI_ATOM_BIT(stencil_ref) | - SI_ATOM_BIT(spi_map) | SI_ATOM_BIT(scratch_state)); } diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c index 4aad94d95f9..70c129242d1 100644 --- a/src/gallium/drivers/radeonsi/si_state_binning.c +++ b/src/gallium/drivers/radeonsi/si_state_binning.c @@ -310,6 +310,8 @@ static struct uvec2 si_get_depth_bin_size(struct si_context *sctx) static void si_emit_dpbb_disable(struct si_context *sctx) { + unsigned initial_cdw = sctx->gfx_cs->current.cdw; + radeon_opt_set_context_reg(sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0, S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | @@ -318,6 +320,8 @@ static void si_emit_dpbb_disable(struct si_context *sctx) SI_TRACKED_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } void si_emit_dpbb_state(struct si_context *sctx) @@ -419,6 +423,7 @@ void si_emit_dpbb_state(struct si_context *sctx) if (bin_size.y >= 32) bin_size_extend.y = util_logbase2(bin_size.y) - 5; + unsigned initial_cdw = sctx->gfx_cs->current.cdw; radeon_opt_set_context_reg( sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0, @@ -436,4 +441,6 @@ void si_emit_dpbb_state(struct si_context *sctx) SI_TRACKED_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 69f723e4e4a..83eb646b791 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1189,26 +1189,26 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i unsigned skip_atom_mask) { unsigned num_patches = 0; + /* Vega10/Raven scissor bug workaround. When any context register is + * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR + * registers must be written too. + */ + bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) && + !si_is_atom_dirty(sctx, &sctx->atoms.s.scissors); bool context_roll = false; /* set correctly for GFX9 only */ context_roll |= si_emit_rasterizer_prim_state(sctx); if (sctx->tes_shader.cso) context_roll |= si_emit_derived_tess_state(sctx, info, &num_patches); - if (info->count_from_stream_output) + + if (handle_scissor_bug && + (info->count_from_stream_output || + sctx->dirty_atoms & si_atoms_that_always_roll_context() || + sctx->dirty_states & si_states_that_always_roll_context() || + si_prim_restart_index_changed(sctx, info))) context_roll = true; - /* Vega10/Raven scissor bug workaround. When any context register is - * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR - * registers must be written too. - */ - if ((sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) && - (context_roll || - sctx->dirty_atoms & si_atoms_that_roll_context() || - sctx->dirty_states & si_states_that_roll_context() || - si_prim_restart_index_changed(sctx, info))) { - sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; - si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); - } + sctx->context_roll_counter = 0; /* Emit state atoms. */ unsigned mask = sctx->dirty_atoms & ~skip_atom_mask; @@ -1231,6 +1231,12 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i } sctx->dirty_states = 0; + if (handle_scissor_bug && + (context_roll || sctx->context_roll_counter)) { + sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + sctx->atoms.s.scissors.emit(sctx); + } + /* Emit draw states. */ si_emit_vs_state(sctx, info); si_emit_draw_registers(sctx, info, num_patches); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 2bdac33586b..ad7d21e7816 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -561,6 +561,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) static void si_emit_shader_es(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.es->shader; + unsigned initial_cdw = sctx->gfx_cs->current.cdw; if (!shader) return; @@ -578,6 +579,9 @@ static void si_emit_shader_es(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, shader->vgt_vertex_reuse_block_cntl); + + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) @@ -762,6 +766,8 @@ static void gfx9_get_gs_info(struct si_shader_selector *es, static void si_emit_shader_gs(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.gs->shader; + unsigned initial_cdw = sctx->gfx_cs->current.cdw; + if (!shader) return; @@ -822,6 +828,9 @@ static void si_emit_shader_gs(struct si_context *sctx) SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, shader->vgt_vertex_reuse_block_cntl); } + + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) @@ -957,6 +966,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) static void si_emit_shader_vs(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.vs->shader; + unsigned initial_cdw = sctx->gfx_cs->current.cdw; + if (!shader) return; @@ -994,6 +1005,9 @@ static void si_emit_shader_vs(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, shader->vgt_vertex_reuse_block_cntl); + + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } /** @@ -1156,6 +1170,8 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader) static void si_emit_shader_ps(struct si_context *sctx) { struct si_shader *shader = sctx->queued.named.ps->shader; + unsigned initial_cdw = sctx->gfx_cs->current.cdw; + if (!shader) return; @@ -1181,6 +1197,9 @@ static void si_emit_shader_ps(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK, shader->ctx_reg.ps.cb_shader_mask); + + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } static void si_shader_ps(struct si_shader *shader) @@ -2849,9 +2868,13 @@ static void si_emit_spi_map(struct si_context *sctx) /* R_028644_SPI_PS_INPUT_CNTL_0 */ /* Dota 2: Only ~16% of SPI map updates set different values. */ /* Talos: Only ~9% of SPI map updates set different values. */ + unsigned initial_cdw = sctx->gfx_cs->current.cdw; radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0, spi_ps_input_cntl, sctx->tracked_regs.spi_ps_input_cntl, num_interp); + + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll_counter++; } /** diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 819c773ba8e..587422e50ca 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -258,6 +258,7 @@ static void si_emit_guardband(struct si_context *ctx) * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ * R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ + unsigned initial_cdw = ctx->gfx_cs->current.cdw; radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y), @@ -271,6 +272,8 @@ static void si_emit_guardband(struct si_context *ctx) S_028BE4_PIX_CENTER(rs->half_pixel_center) | S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode)); + if (initial_cdw != ctx->gfx_cs->current.cdw) + ctx->context_roll_counter++; } static void si_emit_scissors(struct si_context *ctx) |