summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-06-08 21:00:22 +0200
committerMarek Olšák <[email protected]>2016-06-14 20:22:16 +0200
commit4eea710b0d050275b532dbc117da97f569e5fb1e (patch)
treeaf75ecd64d16f85497d637bc825f22569040f129
parent373060652c889bb85d5a4673405d77ee75fb6fdc (diff)
radeonsi: try to hit direct hw MSAA resolve by changing micro mode in clear
We could also do MSAA resolve in a compute shader like Vulkan and remove these workarounds. v2: comment the magic numbers Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h1
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c87
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c20
3 files changed, 107 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index edfae95ec78..57fa9e367a4 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -252,6 +252,7 @@ struct r600_texture {
uint64_t dcc_offset; /* 0 = disabled */
unsigned cb_color_info; /* fast clear enable bit */
unsigned color_clear_value[2];
+ unsigned last_msaa_resolve_target_micro_mode;
/* Depth buffer compression and fast clear. */
struct r600_htile_info htile;
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index a1c314ebce0..32347f26edd 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1012,6 +1012,8 @@ r600_texture_create_object(struct pipe_screen *screen,
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
+ /* Applies to GCN. */
+ rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
if (rtex->is_depth) {
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
@@ -1808,6 +1810,83 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
clear_value, R600_COHERENCY_CB_META);
}
+/* Set the same micro tile mode as the destination of the last MSAA resolve.
+ * This allows hitting the MSAA resolve fast path, which requires that both
+ * src and dst micro tile modes match.
+ */
+static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ if (rtex->resource.is_shared ||
+ rtex->surface.nsamples <= 1 ||
+ rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
+ return;
+
+ assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D);
+ assert(rtex->surface.last_level == 0);
+
+ /* These magic numbers were copied from addrlib. It doesn't use any
+ * definitions for them either. They are all 2D_TILED_THIN1 modes with
+ * different bpp and micro tile mode.
+ */
+ if (rscreen->chip_class >= CIK) {
+ switch (rtex->last_msaa_resolve_target_micro_mode) {
+ case 0: /* displayable */
+ rtex->surface.tiling_index[0] = 10;
+ break;
+ case 1: /* thin */
+ rtex->surface.tiling_index[0] = 14;
+ break;
+ case 3: /* rotated */
+ rtex->surface.tiling_index[0] = 28;
+ break;
+ default: /* depth, thick */
+ assert(!"unexpected micro mode");
+ return;
+ }
+ } else { /* SI */
+ switch (rtex->last_msaa_resolve_target_micro_mode) {
+ case 0: /* displayable */
+ switch (rtex->surface.bpe) {
+ case 8:
+ rtex->surface.tiling_index[0] = 10;
+ break;
+ case 16:
+ rtex->surface.tiling_index[0] = 11;
+ break;
+ default: /* 32, 64 */
+ rtex->surface.tiling_index[0] = 12;
+ break;
+ }
+ break;
+ case 1: /* thin */
+ switch (rtex->surface.bpe) {
+ case 8:
+ rtex->surface.tiling_index[0] = 14;
+ break;
+ case 16:
+ rtex->surface.tiling_index[0] = 15;
+ break;
+ case 32:
+ rtex->surface.tiling_index[0] = 16;
+ break;
+ default: /* 64, 128 */
+ rtex->surface.tiling_index[0] = 17;
+ break;
+ }
+ break;
+ default: /* depth, thick */
+ assert(!"unexpected micro mode");
+ return;
+ }
+ }
+
+ rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
+
+ p_atomic_inc(&rscreen->dirty_fb_counter);
+ p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
+}
+
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
@@ -1881,6 +1960,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
continue;
+ /* We can change the micro tile mode before a full clear. */
+ if (rctx->screen->chip_class >= SI)
+ si_set_optimal_micro_tile_mode(rctx->screen, tex);
+
vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
vi_dcc_clear_level(rctx, tex, 0, reset_value);
@@ -1897,6 +1980,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
continue;
}
+ /* We can change the micro tile mode before a full clear. */
+ if (rctx->screen->chip_class >= SI)
+ si_set_optimal_micro_tile_mode(rctx->screen, tex);
+
/* Do the fast clear. */
rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
tex->cmask.offset, tex->cmask.size, 0,
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 9de2c755ebf..754b478432c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -22,6 +22,7 @@
*/
#include "si_pipe.h"
+#include "sid.h"
#include "util/u_format.h"
#include "util/u_surface.h"
@@ -903,8 +904,18 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
info->src.box.height == dst_height &&
info->src.box.depth == 1 &&
dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
- src->surface.micro_tile_mode == dst->surface.micro_tile_mode &&
(!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
+ /* Check the last constraint. */
+ if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
+ /* The next fast clear will switch to this mode to
+ * get direct hw resolve next time if the mode is
+ * different now.
+ */
+ src->last_msaa_resolve_target_micro_mode =
+ dst->surface.micro_tile_mode;
+ goto resolve_to_temp;
+ }
+
/* Resolving into a surface with DCC is unsupported. Since
* it's being overwritten anyway, clear it to uncompressed.
* This is still the fastest codepath even with this clear.
@@ -929,6 +940,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
return true;
}
+resolve_to_temp:
/* Shader-based resolve is VERY SLOW. Instead, resolve into
* a temporary texture and blit.
*/
@@ -943,6 +955,12 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
templ.flags = R600_RESOURCE_FLAG_FORCE_TILING |
R600_RESOURCE_FLAG_DISABLE_DCC;
+ /* The src and dst microtile modes must be the same. */
+ if (src->surface.micro_tile_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING)
+ templ.bind = PIPE_BIND_SCANOUT;
+ else
+ templ.bind = 0;
+
tmp = ctx->screen->resource_create(ctx->screen, &templ);
if (!tmp)
return false;