diff options
author | Marek Olšák <[email protected]> | 2019-01-04 19:19:54 -0500 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-04-04 09:53:24 -0400 |
commit | 2c09eb41221eb704e9e7a21654828173158d1a7d (patch) | |
tree | 6adb48d0372e6dd5562080ef7740190414abb9a6 | |
parent | 029bfa3d253ca70186e245ccf0a7e17bb40a5bab (diff) |
radeonsi: add support for displayable DCC for 1 RB chips
This is the simpler codepath - just disable RB and pipe alignment for DCC.
-rw-r--r-- | src/amd/common/ac_gpu_info.c | 2 | ||||
-rw-r--r-- | src/amd/common/ac_gpu_info.h | 3 | ||||
-rw-r--r-- | src/amd/common/ac_surface.c | 25 | ||||
-rw-r--r-- | src/amd/common/ac_surface.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_winsys.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_texture.c | 74 | ||||
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 8 |
7 files changed, 113 insertions, 7 deletions
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index fc8c6a09d2f..a6d249a6d2f 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -503,6 +503,8 @@ void ac_print_gpu_info(struct radeon_info *info) printf(" clock_crystal_freq = %i\n", info->clock_crystal_freq); printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size); + printf(" use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned); + printf("Memory info:\n"); printf(" pte_fragment_size = %u\n", info->pte_fragment_size); printf(" gart_page_size = %u\n", info->gart_page_size); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index b1ef9c53734..99fed520618 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -56,6 +56,9 @@ struct radeon_info { uint32_t clock_crystal_freq; uint32_t tcc_cache_line_size; + /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */ + bool use_display_dcc_unaligned; + /* Memory info. */ uint32_t pte_fragment_size; uint32_t gart_page_size; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 27e63c318e6..1f43b607174 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -478,7 +478,8 @@ static bool get_display_flag(const struct ac_surf_config *config, unsigned num_channels = config->info.num_channels; unsigned bpe = surf->bpe; - if (surf->flags & RADEON_SURF_SCANOUT && + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 && surf->blk_h == 1) { /* subsampled */ @@ -1217,7 +1218,7 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned; surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned; - surf->u.gfx9.dcc_pitch_max = dout.pitch - 1; + surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1; surf->dcc_size = dout.dccRamSize; surf->dcc_alignment = dout.dccRamBaseAlign; surf->num_dcc_levels = in->numMipLevels; @@ -1453,6 +1454,19 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, AddrSurfInfoIn.flags.metaPipeUnaligned = 0; AddrSurfInfoIn.flags.metaRbUnaligned = 0; + /* The display hardware can only read DCC with RB_ALIGNED=0 and + * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. + * + * The CB block requires RB_ALIGNED=1 except 1 RB chips. + * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes + * after rendering, so PIPE_ALIGNED=1 is recommended. + */ + if (info->use_display_dcc_unaligned && is_color_surface && + AddrSurfInfoIn.flags.display) { + AddrSurfInfoIn.flags.metaPipeUnaligned = 1; + AddrSurfInfoIn.flags.metaRbUnaligned = 1; + } + switch (mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: assert(config->info.samples <= 1); @@ -1525,6 +1539,13 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, surf->bpe * 8, &displayable); if (r) return r; + + /* Display needs unaligned DCC. */ + if (info->use_display_dcc_unaligned && + surf->num_dcc_levels && + (surf->u.gfx9.dcc.pipe_aligned || + surf->u.gfx9.dcc.rb_aligned)) + displayable = false; } surf->is_displayable = displayable; diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 7ae166c70a3..eb50c37c3c2 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -149,7 +149,7 @@ struct gfx9_surf_layout { /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */ uint32_t offset[RADEON_SURF_MAX_LEVELS]; - uint16_t dcc_pitch_max; /* (mip chain pitch - 1) */ + uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */ uint64_t stencil_offset; /* separate stencil */ }; diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 82feef39487..4e53c992fdf 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -217,6 +217,12 @@ struct radeon_bo_metadata { struct { /* surface flags */ unsigned swizzle_mode:5; + + /* DCC flags */ + /* [31:8]: max offset = 4GB - 256; 0 = DCC disabled */ + unsigned dcc_offset_256B:24; + unsigned dcc_pitch_max:14; /* (mip chain pitch - 1) for DCN */ + unsigned dcc_independent_64B:1; } gfx9; } u; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 581f90a7b2f..cb62f153e59 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -37,6 +37,7 @@ #include <inttypes.h> #include "state_tracker/drm_driver.h" #include "amd/common/sid.h" +#include "amd/common/gfx9d.h" static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen, @@ -351,6 +352,11 @@ static void si_get_display_metadata(struct si_screen *sscreen, metadata->u.gfx9.swizzle_mode % 4 == 2; surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode; + + if (metadata->u.gfx9.dcc_offset_256B) { + surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max; + assert(metadata->u.gfx9.dcc_independent_64B == 1); + } } else { surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; surf->u.legacy.bankw = metadata->u.legacy.bankw; @@ -617,6 +623,15 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, if (sscreen->info.chip_class >= GFX9) { md.u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode; + + if (tex->dcc_offset && !tex->dcc_separate_buffer) { + uint64_t dcc_offset = tex->dcc_offset; + + assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24)); + md.u.gfx9.dcc_offset_256B = dcc_offset >> 8; + md.u.gfx9.dcc_pitch_max = tex->surface.u.gfx9.display_dcc_pitch_max; + md.u.gfx9.dcc_independent_64B = 1; + } } else { md.u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; @@ -706,6 +721,23 @@ static void si_get_opaque_metadata(struct si_screen *sscreen, md->metadata[1] == si_get_bo_metadata_word1(sscreen) && G_008F28_COMPRESSION_EN(desc[6])) { tex->dcc_offset = (uint64_t)desc[7] << 8; + + if (sscreen->info.chip_class >= GFX9) { + /* Fix up parameters for displayable DCC. Some state + * trackers don't set the SCANOUT flag when importing + * displayable images, so we have to recover the correct + * parameters here. + */ + tex->surface.u.gfx9.dcc.pipe_aligned = + G_008F24_META_PIPE_ALIGNED(desc[5]); + tex->surface.u.gfx9.dcc.rb_aligned = + G_008F24_META_RB_ALIGNED(desc[5]); + + /* If DCC is unaligned, this can only be a displayable image. */ + if (!tex->surface.u.gfx9.dcc.pipe_aligned && + !tex->surface.u.gfx9.dcc.rb_aligned) + tex->surface.is_displayable = true; + } return; } @@ -715,6 +747,25 @@ static void si_get_opaque_metadata(struct si_screen *sscreen, tex->dcc_offset = 0; } +static bool si_has_displayable_dcc(struct si_texture *tex) +{ + struct si_screen *sscreen = (struct si_screen*)tex->buffer.b.b.screen; + + if (sscreen->info.chip_class <= VI) + return false; + + /* This needs a cache flush before scanout. + * (it can't be scanned out and rendered to simultaneously) + */ + if (sscreen->info.use_display_dcc_unaligned && + tex->dcc_offset && + !tex->surface.u.gfx9.dcc.pipe_aligned && + !tex->surface.u.gfx9.dcc.rb_aligned) + return true; + + return false; +} + static boolean si_texture_get_handle(struct pipe_screen* screen, struct pipe_context *ctx, struct pipe_resource *resource, @@ -759,7 +810,10 @@ static boolean si_texture_get_handle(struct pipe_screen* screen, * disable it for external clients that want write * access. */ - if (usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->dcc_offset) { + if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->dcc_offset) || + /* Displayable DCC requires an explicit flush. */ + (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && + si_has_displayable_dcc(tex))) { if (si_texture_disable_dcc(sctx, tex)) { update_metadata = true; /* si_texture_disable_dcc flushes the context */ @@ -1012,7 +1066,7 @@ void si_print_texture_info(struct si_screen *sscreen, "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", tex->dcc_offset, tex->surface.dcc_size, tex->surface.dcc_alignment, - tex->surface.u.gfx9.dcc_pitch_max, + tex->surface.u.gfx9.display_dcc_pitch_max, tex->surface.num_dcc_levels); } @@ -1199,8 +1253,9 @@ si_texture_create_object(struct pipe_screen *screen, */ if (tex->surface.dcc_size && (buf || !(sscreen->debug_flags & DBG(NO_DCC))) && - !(tex->surface.flags & RADEON_SURF_SCANOUT)) { - /* Reserve space for the DCC buffer. */ + (sscreen->info.use_display_dcc_unaligned || + !(tex->surface.flags & RADEON_SURF_SCANOUT))) { + /* Add space for the DCC buffer. */ tex->dcc_offset = align64(tex->size, tex->surface.dcc_alignment); tex->size = tex->dcc_offset + tex->surface.dcc_size; } @@ -1510,6 +1565,17 @@ static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *ssc si_get_opaque_metadata(sscreen, tex, &metadata); + /* Displayable DCC requires an explicit flush. */ + if (dedicated && + !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && + si_has_displayable_dcc(tex)) { + /* TODO: do we need to decompress DCC? */ + if (si_texture_discard_dcc(sscreen, tex)) { + /* Update BO metadata after disabling DCC. */ + si_set_tex_bo_metadata(sscreen, tex); + } + } + assert(tex->surface.tile_swizzle == 0); return &tex->buffer.b.b; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 58979bd4ea7..c1863057370 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -1226,6 +1226,10 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf, if (bo->ws->info.chip_class >= GFX9) { md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); + + md->u.gfx9.dcc_offset_256B = AMDGPU_TILING_GET(tiling_flags, DCC_OFFSET_256B); + md->u.gfx9.dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX); + md->u.gfx9.dcc_independent_64B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B); } else { md->u.legacy.microtile = RADEON_LAYOUT_LINEAR; md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR; @@ -1259,6 +1263,10 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf, if (bo->ws->info.chip_class >= GFX9) { tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode); + + tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256B); + tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max); + tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64B); } else { if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ |