From 6d21fd51b6a46accb0320a8ceb52f11edc57e82b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 6 Nov 2016 20:08:24 +0100 Subject: radeonsi/gfx9: disable RB+ on Vega10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeon/r600_pipe_common.c | 4 +++- src/gallium/drivers/radeon/r600_pipe_common.h | 2 ++ src/gallium/drivers/radeon/r600_texture.c | 13 ++++++------ src/gallium/drivers/radeonsi/si_pipe.c | 13 ++++++++++++ src/gallium/drivers/radeonsi/si_state.c | 27 ++++++++++++------------- src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- 6 files changed, 39 insertions(+), 22 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 42dc38b48af..c33b4576416 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -724,7 +724,7 @@ static const struct debug_named_value common_debug_options[] = { { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, { "nodcc", DBG_NO_DCC, "Disable DCC." }, { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, - { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." }, + { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." }, { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, { "noce", DBG_NO_CE, "Disable the constant engine"}, @@ -1317,6 +1317,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); + rscreen->has_rbplus = false; + rscreen->rbplus_allowed = false; r600_disk_cache_create(rscreen); diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 35168844fbf..883d5edd48d 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -370,6 +370,8 @@ struct r600_common_screen { uint64_t debug_flags; bool has_cp_dma; bool has_streamout; + bool has_rbplus; /* if RB+ registers exist */ + bool rbplus_allowed; /* if RB+ is allowed */ struct disk_cache *disk_shader_cache; diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index ec7a325d1e7..29533799082 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -2475,12 +2475,13 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, !(rctx->screen->debug_flags & DBG_NO_DCC_FB)) { vi_separate_dcc_try_enable(rctx, tex); - /* Stoney can't do a CMASK-based clear, so all clears are - * considered to be hypothetically slow clears, which - * is weighed when determining to enable separate DCC. + /* RB+ isn't supported with a CMASK-based clear, so all + * clears are considered to be hypothetically slow + * clears, which is weighed when determining whether to + * enable separate DCC. */ if (tex->dcc_gather_statistics && - rctx->family == CHIP_STONEY) + rctx->screen->rbplus_allowed) tex->num_slow_clears++; } @@ -2508,8 +2509,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, continue; } - /* Stoney/RB+ doesn't work with CMASK fast clear. */ - if (rctx->family == CHIP_STONEY) + /* RB+ doesn't work with CMASK fast clear. */ + if (rctx->screen->rbplus_allowed) continue; /* ensure CMASK is enabled */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 7f0b4453088..8904b9df952 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -839,6 +839,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; + + /* Some chips have RB+ registers, but don't support RB+. Those must + * always disable it. + */ + if (sscreen->b.family == CHIP_STONEY || + sscreen->b.chip_class >= GFX9) { + sscreen->b.has_rbplus = true; + + sscreen->b.rbplus_allowed = + !(sscreen->b.debug_flags & DBG_NO_RB_PLUS) && + sscreen->b.family == CHIP_STONEY; + } + (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index c62928fb469..efbd0c62caf 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -117,8 +117,8 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); - /* STONEY-specific register settings. */ - if (sctx->b.family == CHIP_STONEY) { + /* RB+ register settings. */ + if (sctx->screen->b.rbplus_allowed) { unsigned spi_shader_col_format = sctx->ps_shader.cso ? sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; @@ -242,16 +242,15 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a } } - if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { - sx_ps_downconvert = 0; - sx_blend_opt_epsilon = 0; - sx_blend_opt_control = 0; - } - radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ + } else if (sctx->screen->b.has_rbplus) { + radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); + radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */ + radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */ + radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */ } } @@ -483,7 +482,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, continue; } - /* Blending optimizations for Stoney. + /* Blending optimizations for RB+. * These transformations don't change the behavior. * * First, get rid of DST in the blend factors: @@ -558,7 +557,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, color_control |= S_028808_MODE(V_028808_CB_DISABLE); } - if (sctx->b.family == CHIP_STONEY) { + if (sctx->screen->b.has_rbplus) { /* Disable RB+ blend optimizations for dual source blending. * Vulkan does this. */ @@ -1197,8 +1196,8 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s if (!rs || !rs->multisample_enable) db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; - if (sctx->b.family == CHIP_STONEY && - sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) + if (sctx->screen->b.has_rbplus && + !sctx->screen->b.rbplus_allowed) db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, @@ -1968,7 +1967,7 @@ static void si_choose_spi_color_formats(struct r600_surface *surf, unsigned blend = 0; /* supports blending, but may not export alpha */ unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ - /* Choose the SPI color formats. These are required values for Stoney/RB+. + /* Choose the SPI color formats. These are required values for RB+. * Other chips have multiple choices, though they are not necessarily better. */ switch (format) { @@ -4212,7 +4211,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); } - if (sctx->b.family == CHIP_STONEY) + if (sctx->screen->b.has_rbplus) si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index faafa329ef4..79b0bb75656 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2659,7 +2659,7 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->spi_map); } - if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps)) + if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps)) si_mark_atom_dirty(sctx, &sctx->cb_render_state); if (sctx->ps_db_shader_control != db_shader_control) { -- cgit v1.2.3