From 7a62f8621ac0d0d0604f3bf1c9a492050b44d1e8 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Mon, 18 Sep 2017 11:24:10 +0200 Subject: radeonsi: allow out-of-order rasterization in commutative blending cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not enable this by default for additive blending, since it slightly breaks OpenGL invariance guarantees due to non-determinism. Still, there may be some applications can benefit from white-listing via the radeonsi_commutative_blend_add drirc setting without any real visible artifacts. Reviewed-by: Marek Olšák Tested-by: Dieter Nützel --- src/gallium/drivers/radeonsi/driinfo_radeonsi.h | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 2 + src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 67 +++++++++++++++++++++++-- src/gallium/drivers/radeonsi/si_state.h | 1 + src/util/xmlpool/t_options.h | 5 ++ 6 files changed, 73 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h index 8be85289a0c..989e5175cc0 100644 --- a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h +++ b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h @@ -2,4 +2,5 @@ DRI_CONF_SECTION_PERFORMANCE DRI_CONF_RADEONSI_ENABLE_SISCHED("false") DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false") + DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false") DRI_CONF_SECTION_END diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index d6de1525717..372bc56ce76 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1050,6 +1050,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, !(sscreen->b.debug_flags & DBG_NO_OUT_OF_ORDER); sscreen->assume_no_z_fights = driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); + sscreen->commutative_blend_add = + driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 && sscreen->b.family <= CHIP_POLARIS12) || sscreen->b.family == CHIP_VEGA10 || diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3d33e4f0ffa..ce6aa3be96b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -96,6 +96,7 @@ struct si_screen { bool has_draw_indirect_multi; bool has_out_of_order_rast; bool assume_no_z_fights; + bool commutative_blend_add; bool has_msaa_sample_loc_bug; bool dpbb_allowed; bool dfsm_allowed; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 66228af1d23..96f9e444977 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -377,6 +377,48 @@ static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) } } +static void si_blend_check_commutativity(struct si_screen *sscreen, + struct si_state_blend *blend, + enum pipe_blend_func func, + enum pipe_blendfactor src, + enum pipe_blendfactor dst, + unsigned chanmask) +{ + /* Src factor is allowed when it does not depend on Dst */ + static const uint32_t src_allowed = + (1u << PIPE_BLENDFACTOR_ONE) | + (1u << PIPE_BLENDFACTOR_SRC_COLOR) | + (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | + (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | + (1u << PIPE_BLENDFACTOR_CONST_COLOR) | + (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | + (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | + (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | + (1u << PIPE_BLENDFACTOR_ZERO) | + (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | + (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | + (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | + (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | + (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | + (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); + + if (dst == PIPE_BLENDFACTOR_ONE && + (src_allowed & (1u << src))) { + /* Addition is commutative, but floating point addition isn't + * associative: subtle changes can be introduced via different + * rounding. + * + * Out-of-order is also non-deterministic, which means that + * this breaks OpenGL invariance requirements. So only enable + * out-of-order additive blending if explicitly allowed by a + * setting. + */ + if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN || + (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add)) + blend->commutative_4bit |= chanmask; + } +} + /** * Get rid of DST in the blend factors by commuting the operands: * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) @@ -493,6 +535,11 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, continue; } + si_blend_check_commutativity(sctx->screen, blend, + eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); + si_blend_check_commutativity(sctx->screen, blend, + eqA, srcA, dstA, 0x8 << (4 * i)); + /* Blending optimizations for RB+. * These transformations don't change the behavior. * @@ -636,6 +683,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state) (!old_blend || (old_blend->blend_enable_4bit != blend->blend_enable_4bit || old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || + old_blend->commutative_4bit != blend->commutative_4bit || old_blend->logicop_enable != blend->logicop_enable))) si_mark_atom_dirty(sctx, &sctx->msaa_config); } @@ -3208,12 +3256,23 @@ static bool si_out_of_order_rasterization(struct si_context *sctx) if (!colormask) return true; - bool blend_enabled = (colormask & blend->blend_enable_4bit) != 0; + unsigned blendmask = colormask & blend->blend_enable_4bit; - if (blend_enabled) - return false; /* TODO */ + if (blendmask) { + /* Only commutative blending. */ + if (blendmask & ~blend->commutative_4bit) + return false; + + if (!dsa_order_invariant.pass_set) + return false; + } + + if (colormask & ~blendmask) { + if (!dsa_order_invariant.pass_last) + return false; + } - return dsa_order_invariant.pass_last; + return true; } static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 4f14f89166d..4388ea99daf 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -55,6 +55,7 @@ struct si_state_blend { unsigned cb_target_enabled_4bit; unsigned blend_enable_4bit; unsigned need_src_alpha_4bit; + unsigned commutative_4bit; bool alpha_to_coverage:1; bool alpha_to_one:1; bool dual_src_blend:1; diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h index c92215183a5..214c7c359ee 100644 --- a/src/util/xmlpool/t_options.h +++ b/src/util/xmlpool/t_options.h @@ -443,3 +443,8 @@ DRI_CONF_OPT_END DRI_CONF_OPT_BEGIN_B(radeonsi_assume_no_z_fights, def) \ DRI_CONF_DESC(en,gettext("Assume no Z fights (enables aggressive out-of-order rasterization to improve performance; may cause rendering errors)")) \ DRI_CONF_OPT_END + +#define DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD(def) \ +DRI_CONF_OPT_BEGIN_B(radeonsi_commutative_blend_add, def) \ + DRI_CONF_DESC(en,gettext("Commutative additive blending optimizations (may cause rendering errors)")) \ +DRI_CONF_OPT_END -- cgit v1.2.3