summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-01-15 14:40:19 +0100
committerMarek Olšák <[email protected]>2016-01-22 15:02:40 +0100
commitf6360de8c02a52e29c2f6f65b94fd981ffd3851f (patch)
tree95321993757be452773b6d653a10d085b93654c7 /src/gallium/drivers/radeonsi
parent933e3c4145d97e73000beaf1f96db2a70f209f26 (diff)
radeonsi: use all SPI color formats
because not using SPI_SHADER_32_ABGR doubles fill rate. We should also get optimal performance if alpha isn't needed or blending isn't enabled. Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c207
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h5
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c23
5 files changed, 190 insertions, 57 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 75a9d56d110..a93887ec271 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
enum pipe_format format = int_to_norm_format(info->dst.format);
unsigned sample_mask = ~0;
+ /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
+ * the format is R16G16. Use R16A16, which does work.
+ */
+ if (format == PIPE_FORMAT_R16G16_UNORM)
+ format = PIPE_FORMAT_R16A16_UNORM;
+ if (format == PIPE_FORMAT_R16G16_SNORM)
+ format = PIPE_FORMAT_R16A16_SNORM;
+
if (info->src.resource->nr_samples > 1 &&
info->dst.resource->nr_samples <= 1 &&
util_max_layer(info->src.resource, 0) == 0 &&
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e2009de9870..e2725fe3679 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -126,6 +126,10 @@ struct si_framebuffer {
unsigned cb0_is_integer;
unsigned compressed_cb_mask;
unsigned spi_shader_col_format;
+ unsigned spi_shader_col_format_alpha;
+ unsigned spi_shader_col_format_blend;
+ unsigned spi_shader_col_format_blend_alpha;
+ unsigned color_is_int8; /* bitmask */
unsigned dirty_cbufs;
bool dirty_zsbuf;
};
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index af6759ef19c..a3ddee8b42c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
S_028B70_ALPHA_TO_MASK_OFFSET3(2));
+ if (state->alpha_to_coverage)
+ blend->need_src_alpha_4bit |= 0xf;
+
blend->cb_target_mask = 0;
for (int i = 0; i < 8; i++) {
/* state->rt entries > 0 only written if independent blending */
@@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
}
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
+
+ blend->blend_enable_4bit |= 0xf << (i * 4);
+
+ /* This is only important for formats without alpha. */
+ if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
+ blend->need_src_alpha_4bit |= 0xf << (i * 4);
}
if (blend->cb_target_mask) {
@@ -1270,53 +1284,6 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
}
}
-/* Returns the size in bits of the widest component of a CB format */
-static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
-{
- switch(colorformat) {
- case V_028C70_COLOR_4_4_4_4:
- return 4;
-
- case V_028C70_COLOR_1_5_5_5:
- case V_028C70_COLOR_5_5_5_1:
- return 5;
-
- case V_028C70_COLOR_5_6_5:
- return 6;
-
- case V_028C70_COLOR_8:
- case V_028C70_COLOR_8_8:
- case V_028C70_COLOR_8_8_8_8:
- return 8;
-
- case V_028C70_COLOR_10_10_10_2:
- case V_028C70_COLOR_2_10_10_10:
- return 10;
-
- case V_028C70_COLOR_10_11_11:
- case V_028C70_COLOR_11_11_10:
- return 11;
-
- case V_028C70_COLOR_16:
- case V_028C70_COLOR_16_16:
- case V_028C70_COLOR_16_16_16_16:
- return 16;
-
- case V_028C70_COLOR_8_24:
- case V_028C70_COLOR_24_8:
- return 24;
-
- case V_028C70_COLOR_32:
- case V_028C70_COLOR_32_32:
- case V_028C70_COLOR_32_32_32_32:
- case V_028C70_COLOR_X24_8_32_FLOAT:
- return 32;
- }
-
- assert(!"Unknown maximum component size");
- return 0;
-}
-
static uint32_t si_translate_dbformat(enum pipe_format format)
{
switch (format) {
@@ -1886,17 +1853,119 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten
static void si_choose_spi_color_formats(struct r600_surface *surf,
unsigned format, unsigned swap,
- unsigned ntype)
+ unsigned ntype, bool is_depth)
{
- unsigned max_comp_size = si_colorformat_max_comp_size(format);
+ /* Alpha is needed for alpha-to-coverage.
+ * Blending may be with or without alpha.
+ */
+ unsigned normal = 0; /* most optimal, may not support blending or export alpha */
+ unsigned alpha = 0; /* exports alpha, but may not support blending */
+ unsigned blend = 0; /* supports blending, but may not export alpha */
+ unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
- surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
+ /* Choose the SPI color formats. These are required values for Stoney/RB+.
+ * Other chips have multiple choices, though they are not necessarily better.
+ */
+ switch (format) {
+ case V_028C70_COLOR_5_6_5:
+ case V_028C70_COLOR_1_5_5_5:
+ case V_028C70_COLOR_5_5_5_1:
+ case V_028C70_COLOR_4_4_4_4:
+ case V_028C70_COLOR_10_11_11:
+ case V_028C70_COLOR_11_11_10:
+ case V_028C70_COLOR_8:
+ case V_028C70_COLOR_8_8:
+ case V_028C70_COLOR_8_8_8_8:
+ case V_028C70_COLOR_10_10_10_2:
+ case V_028C70_COLOR_2_10_10_10:
+ if (ntype == V_028C70_NUMBER_UINT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
+ else if (ntype == V_028C70_NUMBER_SINT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
+ else
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+ break;
+
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_16_16:
+ case V_028C70_COLOR_16_16_16_16:
+ if (ntype == V_028C70_NUMBER_UNORM ||
+ ntype == V_028C70_NUMBER_SNORM) {
+ /* UNORM16 and SNORM16 don't support blending */
+ if (ntype == V_028C70_NUMBER_UNORM)
+ normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
+ else
+ normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
+
+ /* Use 32 bits per channel for blending. */
+ if (format == V_028C70_COLOR_16) {
+ if (swap == V_028C70_SWAP_STD) { /* R */
+ blend = V_028714_SPI_SHADER_32_R;
+ blend_alpha = V_028714_SPI_SHADER_32_AR;
+ } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
+ blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
+ else
+ assert(0);
+ } else if (format == V_028C70_COLOR_16_16) {
+ if (swap == V_028C70_SWAP_STD) { /* RG */
+ blend = V_028714_SPI_SHADER_32_GR;
+ blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+ } else if (swap == V_028C70_SWAP_ALT) /* RA */
+ blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
+ else
+ assert(0);
+ } else /* 16_16_16_16 */
+ blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+ } else if (ntype == V_028C70_NUMBER_UINT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
+ else if (ntype == V_028C70_NUMBER_SINT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
+ else if (ntype == V_028C70_NUMBER_FLOAT)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+ else
+ assert(0);
+ break;
- if (ntype == V_028C70_NUMBER_SRGB ||
- ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
- max_comp_size <= 10) ||
- (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16))
- surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR;
+ case V_028C70_COLOR_32:
+ if (swap == V_028C70_SWAP_STD) { /* R */
+ blend = normal = V_028714_SPI_SHADER_32_R;
+ alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
+ } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
+ else
+ assert(0);
+ break;
+
+ case V_028C70_COLOR_32_32:
+ if (swap == V_028C70_SWAP_STD) { /* RG */
+ blend = normal = V_028714_SPI_SHADER_32_GR;
+ alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+ } else if (swap == V_028C70_SWAP_ALT) /* RA */
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
+ else
+ assert(0);
+ break;
+
+ case V_028C70_COLOR_32_32_32_32:
+ case V_028C70_COLOR_8_24:
+ case V_028C70_COLOR_24_8:
+ case V_028C70_COLOR_X24_8_32_FLOAT:
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+
+ /* The DB->CB copy needs 32_ABGR. */
+ if (is_depth)
+ alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
+
+ surf->spi_shader_col_format = normal;
+ surf->spi_shader_col_format_alpha = alpha;
+ surf->spi_shader_col_format_blend = blend;
+ surf->spi_shader_col_format_blend_alpha = blend_alpha;
}
static void si_initialize_color_surface(struct si_context *sctx,
@@ -1989,6 +2058,12 @@ static void si_initialize_color_surface(struct si_context *sctx,
blend_bypass = 1;
}
+ if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
+ (format == V_028C70_COLOR_8 ||
+ format == V_028C70_COLOR_8_8 ||
+ format == V_028C70_COLOR_8_8_8_8))
+ surf->color_is_int8 = true;
+
color_info = S_028C70_FORMAT(format) |
S_028C70_COMP_SWAP(swap) |
S_028C70_BLEND_CLAMP(blend_clamp) |
@@ -2068,7 +2143,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
}
/* Determine pixel shader export format */
- si_choose_spi_color_formats(surf, format, swap, ntype);
+ si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
if (sctx->b.family == CHIP_STONEY &&
!(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
@@ -2296,6 +2371,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
sctx->framebuffer.spi_shader_col_format = 0;
+ sctx->framebuffer.spi_shader_col_format_alpha = 0;
+ sctx->framebuffer.spi_shader_col_format_blend = 0;
+ sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
+ sctx->framebuffer.color_is_int8 = 0;
+
sctx->framebuffer.compressed_cb_mask = 0;
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
@@ -2318,6 +2398,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
sctx->framebuffer.spi_shader_col_format |=
surf->spi_shader_col_format << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_alpha |=
+ surf->spi_shader_col_format_alpha << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_blend |=
+ surf->spi_shader_col_format_blend << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_blend_alpha |=
+ surf->spi_shader_col_format_blend_alpha << (i * 4);
+
+ if (surf->color_is_int8)
+ sctx->framebuffer.color_is_int8 |= 1 << i;
if (rtex->fmask.size && rtex->cmask.size) {
sctx->framebuffer.compressed_cb_mask |= 1 << i;
@@ -2328,6 +2417,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (i == 1 && surf) {
sctx->framebuffer.spi_shader_col_format |=
surf->spi_shader_col_format << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_alpha |=
+ surf->spi_shader_col_format_alpha << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_blend |=
+ surf->spi_shader_col_format_blend << (i * 4);
+ sctx->framebuffer.spi_shader_col_format_blend_alpha |=
+ surf->spi_shader_col_format_blend_alpha << (i * 4);
}
if (state->zsbuf) {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 46ba3c4301f..be3488e6dba 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -42,6 +42,11 @@ struct si_state_blend {
bool alpha_to_coverage;
bool alpha_to_one;
bool dual_src_blend;
+ /* Set 0xf or 0x0 (4 bits) per render target if the following is
+ * true. ANDed with spi_shader_col_format.
+ */
+ unsigned blend_enable_4bit;
+ unsigned need_src_alpha_4bit;
};
struct si_state_rasterizer {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index e08722d0d28..59aee54c3b1 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -617,7 +617,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
sel->info.colors_written == 0x1)
key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
- key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
+ if (blend) {
+ /* Select the shader color format based on whether
+ * blending or alpha are needed.
+ */
+ key->ps.spi_shader_col_format =
+ (blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_blend_alpha) |
+ (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_blend) |
+ (~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_alpha) |
+ (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format);
+ } else
+ key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
/* If alpha-to-coverage is enabled, we have to export alpha
* even if there is no color buffer.
@@ -626,6 +640,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
blend && blend->alpha_to_coverage)
key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
+ /* On SI and CIK except Hawaii, the CB doesn't clamp outputs
+ * to the range supported by the type if a channel has less
+ * than 16 bits and the export format is 16_ABGR.
+ */
+ if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
+ key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
+
if (rs) {
bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||