From f6360de8c02a52e29c2f6f65b94fd981ffd3851f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 15 Jan 2016 14:40:19 +0100 Subject: radeonsi: use all SPI color formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit because not using SPI_SHADER_32_ABGR doubles fill rate. We should also get optimal performance if alpha isn't needed or blending isn't enabled. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeon/r600_pipe_common.h | 6 +- src/gallium/drivers/radeonsi/si_blit.c | 8 + src/gallium/drivers/radeonsi/si_pipe.h | 4 + src/gallium/drivers/radeonsi/si_state.c | 207 +++++++++++++++++------- src/gallium/drivers/radeonsi/si_state.h | 5 + src/gallium/drivers/radeonsi/si_state_shaders.c | 23 ++- 6 files changed, 195 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index f3271e2ee6c..d66e74f9254 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -236,6 +236,7 @@ struct r600_surface { /* Misc. color flags. */ bool alphatest_bypass; bool export_16bpc; + bool color_is_int8; /* Color registers. */ unsigned cb_color_info; @@ -252,7 +253,10 @@ struct r600_surface { unsigned cb_color_fmask_slice; /* EG and later */ unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ unsigned cb_color_mask; /* R600 only */ - unsigned spi_shader_col_format; /* SI+ */ + unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */ + unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */ + unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */ + unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */ unsigned sx_ps_downconvert; /* Stoney only */ unsigned sx_blend_opt_epsilon; /* Stoney only */ struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 75a9d56d110..a93887ec271 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, enum pipe_format format = int_to_norm_format(info->dst.format); unsigned sample_mask = ~0; + /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and + * the format is R16G16. Use R16A16, which does work. + */ + if (format == PIPE_FORMAT_R16G16_UNORM) + format = PIPE_FORMAT_R16A16_UNORM; + if (format == PIPE_FORMAT_R16G16_SNORM) + format = PIPE_FORMAT_R16A16_SNORM; + if (info->src.resource->nr_samples > 1 && info->dst.resource->nr_samples <= 1 && util_max_layer(info->src.resource, 0) == 0 && diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e2009de9870..e2725fe3679 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -126,6 +126,10 @@ struct si_framebuffer { unsigned cb0_is_integer; unsigned compressed_cb_mask; unsigned spi_shader_col_format; + unsigned spi_shader_col_format_alpha; + unsigned spi_shader_col_format_blend; + unsigned spi_shader_col_format_blend_alpha; + unsigned color_is_int8; /* bitmask */ unsigned dirty_cbufs; bool dirty_zsbuf; }; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index af6759ef19c..a3ddee8b42c 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2)); + if (state->alpha_to_coverage) + blend->need_src_alpha_4bit |= 0xf; + blend->cb_target_mask = 0; for (int i = 0; i < 8; i++) { /* state->rt entries > 0 only written if independent blending */ @@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); } si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); + + blend->blend_enable_4bit |= 0xf << (i * 4); + + /* This is only important for formats without alpha. */ + if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) + blend->need_src_alpha_4bit |= 0xf << (i * 4); } if (blend->cb_target_mask) { @@ -1270,53 +1284,6 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat) } } -/* Returns the size in bits of the widest component of a CB format */ -static unsigned si_colorformat_max_comp_size(uint32_t colorformat) -{ - switch(colorformat) { - case V_028C70_COLOR_4_4_4_4: - return 4; - - case V_028C70_COLOR_1_5_5_5: - case V_028C70_COLOR_5_5_5_1: - return 5; - - case V_028C70_COLOR_5_6_5: - return 6; - - case V_028C70_COLOR_8: - case V_028C70_COLOR_8_8: - case V_028C70_COLOR_8_8_8_8: - return 8; - - case V_028C70_COLOR_10_10_10_2: - case V_028C70_COLOR_2_10_10_10: - return 10; - - case V_028C70_COLOR_10_11_11: - case V_028C70_COLOR_11_11_10: - return 11; - - case V_028C70_COLOR_16: - case V_028C70_COLOR_16_16: - case V_028C70_COLOR_16_16_16_16: - return 16; - - case V_028C70_COLOR_8_24: - case V_028C70_COLOR_24_8: - return 24; - - case V_028C70_COLOR_32: - case V_028C70_COLOR_32_32: - case V_028C70_COLOR_32_32_32_32: - case V_028C70_COLOR_X24_8_32_FLOAT: - return 32; - } - - assert(!"Unknown maximum component size"); - return 0; -} - static uint32_t si_translate_dbformat(enum pipe_format format) { switch (format) { @@ -1886,17 +1853,119 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten static void si_choose_spi_color_formats(struct r600_surface *surf, unsigned format, unsigned swap, - unsigned ntype) + unsigned ntype, bool is_depth) { - unsigned max_comp_size = si_colorformat_max_comp_size(format); + /* Alpha is needed for alpha-to-coverage. + * Blending may be with or without alpha. + */ + unsigned normal = 0; /* most optimal, may not support blending or export alpha */ + unsigned alpha = 0; /* exports alpha, but may not support blending */ + unsigned blend = 0; /* supports blending, but may not export alpha */ + unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ - surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR; + /* Choose the SPI color formats. These are required values for Stoney/RB+. + * Other chips have multiple choices, though they are not necessarily better. + */ + switch (format) { + case V_028C70_COLOR_5_6_5: + case V_028C70_COLOR_1_5_5_5: + case V_028C70_COLOR_5_5_5_1: + case V_028C70_COLOR_4_4_4_4: + case V_028C70_COLOR_10_11_11: + case V_028C70_COLOR_11_11_10: + case V_028C70_COLOR_8: + case V_028C70_COLOR_8_8: + case V_028C70_COLOR_8_8_8_8: + case V_028C70_COLOR_10_10_10_2: + case V_028C70_COLOR_2_10_10_10: + if (ntype == V_028C70_NUMBER_UINT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; + else if (ntype == V_028C70_NUMBER_SINT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; + else + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; + break; + + case V_028C70_COLOR_16: + case V_028C70_COLOR_16_16: + case V_028C70_COLOR_16_16_16_16: + if (ntype == V_028C70_NUMBER_UNORM || + ntype == V_028C70_NUMBER_SNORM) { + /* UNORM16 and SNORM16 don't support blending */ + if (ntype == V_028C70_NUMBER_UNORM) + normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; + else + normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; + + /* Use 32 bits per channel for blending. */ + if (format == V_028C70_COLOR_16) { + if (swap == V_028C70_SWAP_STD) { /* R */ + blend = V_028714_SPI_SHADER_32_R; + blend_alpha = V_028714_SPI_SHADER_32_AR; + } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ + blend = blend_alpha = V_028714_SPI_SHADER_32_AR; + else + assert(0); + } else if (format == V_028C70_COLOR_16_16) { + if (swap == V_028C70_SWAP_STD) { /* RG */ + blend = V_028714_SPI_SHADER_32_GR; + blend_alpha = V_028714_SPI_SHADER_32_ABGR; + } else if (swap == V_028C70_SWAP_ALT) /* RA */ + blend = blend_alpha = V_028714_SPI_SHADER_32_AR; + else + assert(0); + } else /* 16_16_16_16 */ + blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; + } else if (ntype == V_028C70_NUMBER_UINT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; + else if (ntype == V_028C70_NUMBER_SINT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; + else if (ntype == V_028C70_NUMBER_FLOAT) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; + else + assert(0); + break; - if (ntype == V_028C70_NUMBER_SRGB || - ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && - max_comp_size <= 10) || - (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) - surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR; + case V_028C70_COLOR_32: + if (swap == V_028C70_SWAP_STD) { /* R */ + blend = normal = V_028714_SPI_SHADER_32_R; + alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; + } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; + else + assert(0); + break; + + case V_028C70_COLOR_32_32: + if (swap == V_028C70_SWAP_STD) { /* RG */ + blend = normal = V_028714_SPI_SHADER_32_GR; + alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; + } else if (swap == V_028C70_SWAP_ALT) /* RA */ + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; + else + assert(0); + break; + + case V_028C70_COLOR_32_32_32_32: + case V_028C70_COLOR_8_24: + case V_028C70_COLOR_24_8: + case V_028C70_COLOR_X24_8_32_FLOAT: + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; + break; + + default: + assert(0); + return; + } + + /* The DB->CB copy needs 32_ABGR. */ + if (is_depth) + alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; + + surf->spi_shader_col_format = normal; + surf->spi_shader_col_format_alpha = alpha; + surf->spi_shader_col_format_blend = blend; + surf->spi_shader_col_format_blend_alpha = blend_alpha; } static void si_initialize_color_surface(struct si_context *sctx, @@ -1989,6 +2058,12 @@ static void si_initialize_color_surface(struct si_context *sctx, blend_bypass = 1; } + if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && + (format == V_028C70_COLOR_8 || + format == V_028C70_COLOR_8_8 || + format == V_028C70_COLOR_8_8_8_8)) + surf->color_is_int8 = true; + color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | @@ -2068,7 +2143,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } /* Determine pixel shader export format */ - si_choose_spi_color_formats(surf, format, swap, ntype); + si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); if (sctx->b.family == CHIP_STONEY && !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) { @@ -2296,6 +2371,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&sctx->framebuffer.state, state); sctx->framebuffer.spi_shader_col_format = 0; + sctx->framebuffer.spi_shader_col_format_alpha = 0; + sctx->framebuffer.spi_shader_col_format_blend = 0; + sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; + sctx->framebuffer.color_is_int8 = 0; + sctx->framebuffer.compressed_cb_mask = 0; sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); @@ -2318,6 +2398,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4); + sctx->framebuffer.spi_shader_col_format_alpha |= + surf->spi_shader_col_format_alpha << (i * 4); + sctx->framebuffer.spi_shader_col_format_blend |= + surf->spi_shader_col_format_blend << (i * 4); + sctx->framebuffer.spi_shader_col_format_blend_alpha |= + surf->spi_shader_col_format_blend_alpha << (i * 4); + + if (surf->color_is_int8) + sctx->framebuffer.color_is_int8 |= 1 << i; if (rtex->fmask.size && rtex->cmask.size) { sctx->framebuffer.compressed_cb_mask |= 1 << i; @@ -2328,6 +2417,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (i == 1 && surf) { sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4); + sctx->framebuffer.spi_shader_col_format_alpha |= + surf->spi_shader_col_format_alpha << (i * 4); + sctx->framebuffer.spi_shader_col_format_blend |= + surf->spi_shader_col_format_blend << (i * 4); + sctx->framebuffer.spi_shader_col_format_blend_alpha |= + surf->spi_shader_col_format_blend_alpha << (i * 4); } if (state->zsbuf) { diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 46ba3c4301f..be3488e6dba 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -42,6 +42,11 @@ struct si_state_blend { bool alpha_to_coverage; bool alpha_to_one; bool dual_src_blend; + /* Set 0xf or 0x0 (4 bits) per render target if the following is + * true. ANDed with spi_shader_col_format. + */ + unsigned blend_enable_4bit; + unsigned need_src_alpha_4bit; }; struct si_state_rasterizer { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index e08722d0d28..59aee54c3b1 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -617,7 +617,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, sel->info.colors_written == 0x1) key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; - key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format; + if (blend) { + /* Select the shader color format based on whether + * blending or alpha are needed. + */ + key->ps.spi_shader_col_format = + (blend->blend_enable_4bit & blend->need_src_alpha_4bit & + sctx->framebuffer.spi_shader_col_format_blend_alpha) | + (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & + sctx->framebuffer.spi_shader_col_format_blend) | + (~blend->blend_enable_4bit & blend->need_src_alpha_4bit & + sctx->framebuffer.spi_shader_col_format_alpha) | + (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & + sctx->framebuffer.spi_shader_col_format); + } else + key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format; /* If alpha-to-coverage is enabled, we have to export alpha * even if there is no color buffer. @@ -626,6 +640,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, blend && blend->alpha_to_coverage) key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; + /* On SI and CIK except Hawaii, the CB doesn't clamp outputs + * to the range supported by the type if a channel has less + * than 16 bits and the export format is 16_ABGR. + */ + if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII) + key->ps.color_is_int8 = sctx->framebuffer.color_is_int8; + if (rs) { bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES && sctx->current_rast_prim <= PIPE_PRIM_POLYGON) || -- cgit v1.2.3