summaryrefslogtreecommitdiffstats
path: root/src/amd/addrlib/gfx9/gfx9addrlib.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd/addrlib/gfx9/gfx9addrlib.cpp')
-rw-r--r--src/amd/addrlib/gfx9/gfx9addrlib.cpp224
1 files changed, 159 insertions, 65 deletions
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
index e06f13c0afe..b88d3243228 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -189,10 +189,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
- Dim3d metaBlkDim = {8, 8, 1};
+ Dim3d metaBlkDim = {8, 8, 1};
UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
- UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
- UINT_32 heightAmp = totalAmpBits - widthAmp;
+ UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
+ UINT_32 heightAmp = totalAmpBits - widthAmp;
metaBlkDim.w <<= widthAmp;
metaBlkDim.h <<= heightAmp;
@@ -221,39 +221,42 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
&numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
- UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+ const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
+ UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
- if (m_settings.htileAlignFix)
+ if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
{
- sizeAlign <<= 1;
+ align *= (numPipeTotal >> 1);
}
- pOut->pitch = numMetaBlkX * metaBlkDim.w;
- pOut->height = numMetaBlkY * metaBlkDim.h;
- pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
-
- pOut->metaBlkWidth = metaBlkDim.w;
- pOut->metaBlkHeight = metaBlkDim.h;
- pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
-
- pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
+ align = Max(align, metaBlkSize);
if (m_settings.metaBaseAlignFix)
{
- pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
+ align = Max(align, GetBlockSize(pIn->swizzleMode));
}
- if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
+ if (m_settings.htileAlignFix)
{
- UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
+ const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
+ const INT_32 htileCachelineSizeLog2 = 11;
+ const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
- if (additionalAlign > sizeAlign)
- {
- sizeAlign = additionalAlign;
- }
+ INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
+
+ align <<= rbMaskPadding;
}
- pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
+ pOut->pitch = numMetaBlkX * metaBlkDim.w;
+ pOut->height = numMetaBlkY * metaBlkDim.h;
+ pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
+
+ pOut->metaBlkWidth = metaBlkDim.w;
+ pOut->metaBlkHeight = metaBlkDim.h;
+ pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
+
+ pOut->baseAlign = align;
+ pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
return ADDR_OK;
}
@@ -333,17 +336,17 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+ if (m_settings.metaBaseAlignFix)
+ {
+ sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
+ }
+
pOut->pitch = numMetaBlkX * metaBlkDim.w;
pOut->height = numMetaBlkY * metaBlkDim.h;
pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
- if (m_settings.metaBaseAlignFix)
- {
- pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
- }
-
pOut->metaBlkWidth = metaBlkDim.w;
pOut->metaBlkHeight = metaBlkDim.h;
@@ -638,16 +641,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
sizeAlign *= (numFrags / m_maxCompFrag);
}
+ if (m_settings.metaBaseAlignFix)
+ {
+ sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
+ }
+
pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
numCompressBlkPerMetaBlk * numFrags;
pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
- if (m_settings.metaBaseAlignFix)
- {
- pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
- }
-
pOut->pitch = numMetaBlkX * metaBlkDim.w;
pOut->height = numMetaBlkY * metaBlkDim.h;
pOut->depth = numMetaBlkZ * metaBlkDim.d;
@@ -670,21 +673,78 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
/**
************************************************************************************************************************
-* Gfx9Lib::HwlGetMaxAlignments
+* Gfx9Lib::HwlComputeMaxBaseAlignments
*
* @brief
* Gets maximum alignments
* @return
-* ADDR_E_RETURNCODE
+* maximum alignments
************************************************************************************************************************
*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
- ) const
+UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
{
- pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
+ return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
+}
- return ADDR_OK;
+/**
+************************************************************************************************************************
+* Gfx9Lib::HwlComputeMaxMetaBaseAlignments
+*
+* @brief
+* Gets maximum alignments for metadata
+* @return
+* maximum alignments for metadata
+************************************************************************************************************************
+*/
+UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
+{
+ // Max base alignment for Htile
+ const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
+ const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
+
+ // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
+ // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
+ ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
+ const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
+
+ UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
+
+ if (maxNumPipeTotal > 2)
+ {
+ maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
+ }
+
+ maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
+
+ if (m_settings.metaBaseAlignFix)
+ {
+ maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
+ }
+
+ if (m_settings.htileAlignFix)
+ {
+ maxBaseAlignHtile *= maxNumPipeTotal;
+ }
+
+ // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
+
+ // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
+ UINT_32 maxBaseAlignDcc3D = 65536;
+
+ if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
+ {
+ maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
+ }
+
+ // Max base alignment for Msaa Dcc
+ UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
+
+ if (m_settings.metaBaseAlignFix)
+ {
+ maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
+ }
+
+ return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
}
/**
@@ -724,9 +784,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
- const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
- Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
+ MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
+ Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -798,9 +860,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
+ MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -870,9 +934,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
+ MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
pIn->swizzleMode);
@@ -948,10 +1014,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
- const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
- Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
- compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2});
+ MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+ Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+ compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
+
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -1055,6 +1123,10 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
break;
}
+ // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
+ // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
+ ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
+
switch (gbAddrConfig.bits.NUM_BANKS)
{
case ADDR_CONFIG_1_BANK:
@@ -1151,6 +1223,19 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
+
+ if ((m_rbPerSeLog2 == 1) &&
+ (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
+ ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
+ {
+ ADDR_ASSERT(m_settings.isVega10 == FALSE);
+ ADDR_ASSERT(m_settings.isRaven == FALSE);
+
+ if (m_settings.isVega12)
+ {
+ m_settings.htileCacheRbConflict = 1;
+ }
+ }
}
else
{
@@ -1187,6 +1272,7 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
case FAMILY_AI:
m_settings.isArcticIsland = 1;
m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
+ m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
m_settings.isDce12 = 1;
@@ -3279,10 +3365,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
addrPreferredSwSet.value = AddrSwSetZ;
addrValidSwSet.value = AddrSwSetZ;
- if (pIn->flags.depth && pIn->flags.texture)
+ if (pIn->flags.noMetadata == FALSE)
{
- if (((bpp == 16) && (numFrags >= 4)) ||
- ((bpp == 32) && (numFrags >= 2)))
+ if (pIn->flags.depth &&
+ pIn->flags.texture &&
+ (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
{
// When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
// equation from wrong address within memory range a tile covered and use the
@@ -3290,6 +3377,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
pOut->canXor = FALSE;
prtXor = FALSE;
}
+
+ if (m_settings.htileCacheRbConflict &&
+ (pIn->flags.depth || pIn->flags.stencil) &&
+ (slice > 1) &&
+ (pIn->flags.metaRbUnaligned == FALSE) &&
+ (pIn->flags.metaPipeUnaligned == FALSE))
+ {
+ // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
+ pOut->canXor = FALSE;
+ }
}
}
else if (ElemLib::IsBlockCompressed(pIn->format))
@@ -3402,12 +3499,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (pIn->bpp == 64)
{
addrPreferredSwSet.value = AddrSwSetD;
- addrValidSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
}
else
{
addrPreferredSwSet.value = AddrSwSetS;
- addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetS;
}
blockSet.micro = FALSE;
@@ -4037,7 +4134,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
(pIn->bpp >> 3) * pIn->numFrags;
pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
- pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
+ pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
if (pIn->flags.prt)
{
@@ -4762,15 +4859,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
- UINT_32 macroBlockIndex =
+ UINT_64 macroBlockIndex =
(pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
((pIn->x / localOut.blockWidth) + mipStartPos.w);
- UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
- GetBlockSizeLog2(pIn->swizzleMode));
-
- pOut->addr = blockOffset | macroBlockOffset;
+ pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
}
else
{
@@ -4835,7 +4929,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
UINT_32 sliceSizeInBlock =
(localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
- UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+ UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
pOut->addr = blockOffset | (blockIndex << log2blkSize);
}