aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavi Zhang <[email protected]>2014-07-09 02:46:00 -0400
committerMarek Olšák <[email protected]>2017-03-30 14:44:33 +0200
commit3614999878fd1335e69ecb0d181a9f6d2b91e3f8 (patch)
treed769ba917209ada212e53ac62b3a0c36ccb923a3
parentc12e35065af693fcad866d2089adf277a6109683 (diff)
amdgpu/addrlib: Rewrite tile mode optmization code
Note: remove reference to degrade4Space and use opt4Space instead.
-rw-r--r--src/amd/addrlib/addrinterface.h6
-rw-r--r--src/amd/addrlib/core/addrcommon.h3
-rw-r--r--src/amd/addrlib/core/addrlib.cpp47
-rw-r--r--src/amd/addrlib/core/addrlib.h2
-rw-r--r--src/amd/addrlib/r800/egbaddrlib.cpp16
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c5
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_surface.c12
7 files changed, 57 insertions, 34 deletions
diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h
index d05c6ef08db..a50717c1ceb 100644
--- a/src/amd/addrlib/addrinterface.h
+++ b/src/amd/addrlib/addrinterface.h
@@ -246,9 +246,8 @@ typedef union _ADDR_CREATE_FLAGS
UINT_32 useCombinedSwizzle : 1; ///< Use combined tile swizzle
UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level
UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
- UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level
UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
- UINT_32 reserved : 24; ///< Reserved bits for future use
+ UINT_32 reserved : 25; ///< Reserved bits for future use
};
UINT_32 value;
@@ -440,7 +439,6 @@ typedef union _ADDR_SURFACE_FLAGS
UINT_32 qbStereo : 1; ///< Quad buffer stereo surface
UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding
- UINT_32 degrade4Space : 1; ///< Degrade base level's tile mode to save memory
UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable
UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce
UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear
@@ -448,7 +446,7 @@ typedef union _ADDR_SURFACE_FLAGS
/// This flag indicates we need to follow the alignment with
/// CZ families or other ASICs under PX configuration + CZ.
UINT_32 nonSplit : 1; ///< CI: depth texture should not be split
- UINT_32 reserved : 9; ///< Reserved bits
+ UINT_32 reserved : 10; ///< Reserved bits
};
UINT_32 value;
diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h
index f996c9a3402..88cbad0b3ba 100644
--- a/src/amd/addrlib/core/addrcommon.h
+++ b/src/amd/addrlib/core/addrcommon.h
@@ -132,9 +132,8 @@ union ADDR_CONFIG_FLAGS
UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle
UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level
UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
- UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level
UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
- UINT_32 reserved : 22; ///< Reserved bits for future use
+ UINT_32 reserved : 23; ///< Reserved bits for future use
};
UINT_32 value;
diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp
index 8cf4a245229..b92568ec183 100644
--- a/src/amd/addrlib/core/addrlib.cpp
+++ b/src/amd/addrlib/core/addrlib.cpp
@@ -264,7 +264,6 @@ ADDR_E_RETURNCODE AddrLib::Create(
pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle;
pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel;
pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign;
- pLib->m_configFlags.degradeBaseLevel = pCreateIn->createFlags.degradeBaseLevel;
pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
@@ -559,8 +558,8 @@ ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo(
localIn.tileMode = tileMode;
localIn.tileType = tileType;
}
- // Degrade base level if applicable
- if (DegradeBaseLevel(&localIn, &tileMode))
+ // Optimize tile mode if possible
+ if (OptimizeTileMode(&localIn, &tileMode))
{
localIn.tileMode = tileMode;
}
@@ -3493,34 +3492,44 @@ VOID AddrLib::ComputeMipLevel(
/**
***************************************************************************************************
-* AddrLib::DegradeBaseLevel
+* AddrLib::OptimizeTileMode
*
* @brief
-* Check if base level's tile mode can be degraded
+* Check if base level's tile mode can be optimized (degraded)
* @return
* TRUE if degraded, also returns degraded tile mode (unchanged if not degraded)
***************************************************************************************************
*/
-BOOL_32 AddrLib::DegradeBaseLevel(
+BOOL_32 AddrLib::OptimizeTileMode(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure for surface info
AddrTileMode* pTileMode ///< [out] Degraded tile mode
) const
{
- BOOL_32 degraded = FALSE;
AddrTileMode tileMode = pIn->tileMode;
UINT_32 thickness = ComputeSurfaceThickness(tileMode);
- if (m_configFlags.degradeBaseLevel) // This is a global setting
+ // Optimization can only be done on level 0 and samples <= 1
+ if ((pIn->flags.opt4Space == TRUE) &&
+ (pIn->mipLevel == 0) &&
+ (pIn->numSamples <= 1) &&
+ (pIn->flags.display == FALSE) &&
+ (IsPrtTileMode(tileMode) == FALSE) &&
+ (pIn->flags.prt == FALSE))
{
- if (pIn->flags.degrade4Space && // Degradation per surface
- pIn->mipLevel == 0 &&
- pIn->numSamples == 1 &&
- IsMacroTiled(tileMode))
+ // Check if linear mode is optimal
+ if ((pIn->height == 1) &&
+ (IsLinear(tileMode) == FALSE) &&
+ (AddrElemLib::IsBlockCompressed(pIn->format) == FALSE) &&
+ (pIn->flags.depth == FALSE) &&
+ (pIn->flags.stencil == FALSE))
+ {
+ tileMode = ADDR_TM_LINEAR_ALIGNED;
+ }
+ else if (IsMacroTiled(tileMode))
{
if (HwlDegradeBaseLevel(pIn))
{
- *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
- degraded = TRUE;
+ tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
}
else if (thickness > 1)
{
@@ -3534,15 +3543,19 @@ BOOL_32 AddrLib::DegradeBaseLevel(
input.tileMode = tileMode;
if (HwlDegradeBaseLevel(&input))
{
- *pTileMode = ADDR_TM_1D_TILED_THICK;
- degraded = TRUE;
+ tileMode = ADDR_TM_1D_TILED_THICK;
}
}
}
}
}
- return degraded;
+ BOOL_32 optimized = (tileMode != pIn->tileMode);
+ if (optimized)
+ {
+ *pTileMode = tileMode;
+ }
+ return optimized;
}
/**
diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h
index 43c55ff32ff..d693fd2bcbf 100644
--- a/src/amd/addrlib/core/addrlib.h
+++ b/src/amd/addrlib/core/addrlib.h
@@ -652,7 +652,7 @@ private:
VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
- BOOL_32 DegradeBaseLevel(
+ BOOL_32 OptimizeTileMode(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const;
protected:
diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp
index abd1a79ed80..5d80906aea3 100644
--- a/src/amd/addrlib/r800/egbaddrlib.cpp
+++ b/src/amd/addrlib/r800/egbaddrlib.cpp
@@ -1158,6 +1158,22 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel(
if (valid)
{
degrade = (pIn->width < pitchAlign || pIn->height < heightAlign);
+ // Check whether 2D tiling still has too much footprint
+ if (degrade == FALSE)
+ {
+ // Only check width and height as slices are aligned to thickness
+ UINT_64 unalignedSize = pIn->width * pIn->height;
+
+ UINT_32 alignedPitch = PowTwoAlign(pIn->width, pitchAlign);
+ UINT_32 alignedHeight = PowTwoAlign(pIn->height, heightAlign);
+ UINT_64 alignedSize = alignedPitch * alignedHeight;
+
+ // alignedSize > 1.5 * unalignedSize
+ if (2 * alignedSize > 3 * unalignedSize)
+ {
+ degrade = TRUE;
+ }
+ }
}
else
{
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
index 89e84d60a3a..0433952e749 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
@@ -140,7 +140,6 @@ ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family,
createFlags.value = 0;
createFlags.useTileIndex = 1;
- createFlags.degradeBaseLevel = 1;
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
addrCreateInput.chipFamily = family;
@@ -398,7 +397,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
- AddrSurfInfoIn.flags.degrade4Space = 1;
+ AddrSurfInfoIn.flags.opt4Space = 1;
/* DCC notes:
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
@@ -437,7 +436,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
AddrTileInfoIn.macroAspectRatio = surf->mtilea;
AddrTileInfoIn.tileSplitBytes = surf->tile_split;
AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */
- AddrSurfInfoIn.flags.degrade4Space = 0;
+ AddrSurfInfoIn.flags.opt4Space = 0;
AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index abe2b2a67af..8632f0687fc 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -124,7 +124,6 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
createFlags.value = 0;
createFlags.useTileIndex = 1;
- createFlags.degradeBaseLevel = 1;
createFlags.useHtileSliceAlign = 1;
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
@@ -401,11 +400,10 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
/* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
* requested, because TC-compatible HTILE requires 2D tiling.
*/
- AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible &&
- !AddrSurfInfoIn.flags.fmask &&
- tex->nr_samples <= 1 &&
- (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE);
- AddrSurfInfoIn.flags.opt4Space = AddrSurfInfoIn.flags.degrade4Space;
+ AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
+ !AddrSurfInfoIn.flags.fmask &&
+ tex->nr_samples <= 1 &&
+ (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE);
/* DCC notes:
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
@@ -447,7 +445,7 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrTileInfoIn.macroAspectRatio = surf->mtilea;
AddrTileInfoIn.tileSplitBytes = surf->tile_split;
AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */
- AddrSurfInfoIn.flags.degrade4Space = 0;
+ AddrSurfInfoIn.flags.opt4Space = 0;
AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set