diff options
author | Marek Olšák <[email protected]> | 2016-10-11 23:19:46 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2016-10-13 19:00:51 +0200 |
commit | d4d9ec55c589156df4edc227a86b4a8c41048d58 (patch) | |
tree | 646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/winsys | |
parent | a077185ea9d685967844b68aa09da6bd8aa430da (diff) |
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less
memory bandwidth.
Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible
HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16.
The format promotion is not visible to state trackers.
This is part of TC-compatible renderbuffer compression, which has 3 parts:
DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now.
I don't see a measurable increase in performance though.
(I tested Talos Principle and DiRT: Showdown, the latter is improved by
0.5%, which is almost noise, and it originally used layered Z16,
so at least we know that Z16 promoted to Z32F isn't slower now)
Tested-by: Edmondo Tommasina <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/winsys')
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c index 8bfea457e45..1bf07a7498c 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c @@ -137,6 +137,7 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws) createFlags.value = 0; createFlags.useTileIndex = 1; createFlags.degradeBaseLevel = 1; + createFlags.useHtileSliceAlign = 1; addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; addrCreateInput.chipFamily = ws->family; @@ -160,7 +161,9 @@ static int compute_level(struct amdgpu_winsys *ws, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, - ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut) + ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut, + ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn, + ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut) { struct radeon_surf_level *surf_level; ADDR_E_RETURNCODE ret; @@ -257,6 +260,32 @@ static int compute_level(struct amdgpu_winsys *ws, } } + /* TC-compatible HTILE. */ + if (!is_stencil && + AddrSurfInfoIn->flags.depth && + AddrSurfInfoIn->flags.tcCompatible && + surf_level->mode == RADEON_SURF_MODE_2D && + level == 0) { + AddrHtileIn->flags.tcCompatible = 1; + AddrHtileIn->pitch = AddrSurfInfoOut->pitch; + AddrHtileIn->height = AddrSurfInfoOut->height; + AddrHtileIn->numSlices = AddrSurfInfoOut->depth; + AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8; + AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8; + AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo; + AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex; + AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; + + ret = AddrComputeHtileInfo(ws->addrlib, + AddrHtileIn, + AddrHtileOut); + + if (ret == ADDR_OK) { + surf->htile_size = AddrHtileOut->htileBytes; + surf->htile_alignment = AddrHtileOut->baseAlign; + } + } + return 0; } @@ -284,6 +313,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; + ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0}; + ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0}; ADDR_TILEINFO AddrTileInfoIn = {0}; ADDR_TILEINFO AddrTileInfoOut = {0}; int r; @@ -296,6 +327,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); + AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT); + AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT); AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; type = RADEON_SURF_GET(surf->flags, TYPE); @@ -361,7 +394,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP; AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0; AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0; - AddrSurfInfoIn.flags.degrade4Space = 1; + AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0; + + /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been + * requested, because TC-compatible HTILE requires 2D tiling. + */ + AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible; /* DCC notes: * - If we add MSAA support, keep in mind that CB can't decompress 8bpp @@ -443,11 +481,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, surf->bo_size = 0; surf->dcc_size = 0; surf->dcc_alignment = 1; + surf->htile_size = 0; + surf->htile_alignment = 1; /* Calculate texture layout information. */ for (level = 0; level <= surf->last_level; level++) { r = compute_level(ws, surf, false, level, type, compressed, - &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); + &AddrSurfInfoIn, &AddrSurfInfoOut, + &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut); if (r) return r; @@ -475,12 +516,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.bpp = 8; AddrSurfInfoIn.flags.depth = 0; AddrSurfInfoIn.flags.stencil = 1; + AddrSurfInfoIn.flags.tcCompatible = 0; /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split; for (level = 0; level <= surf->last_level; level++) { r = compute_level(ws, surf, true, level, type, compressed, - &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); + &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, + NULL, NULL); if (r) return r; @@ -508,6 +551,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, ws->info.num_tile_pipes); } + /* Make sure HTILE covers the whole miptree, because the shader reads + * TC-compatible HTILE even for levels where it's disabled by DB. + */ + if (surf->htile_size && surf->last_level) + surf->htile_size *= 2; + return 0; } |