diff options
Diffstat (limited to 'src/amd/addrlib/gfx9')
-rw-r--r-- | src/amd/addrlib/gfx9/gfx9addrlib.cpp | 224 | ||||
-rw-r--r-- | src/amd/addrlib/gfx9/gfx9addrlib.h | 21 |
2 files changed, 169 insertions, 76 deletions
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp index e06f13c0afe..b88d3243228 100644 --- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp +++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp @@ -189,10 +189,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; - Dim3d metaBlkDim = {8, 8, 1}; + Dim3d metaBlkDim = {8, 8, 1}; UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; - UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); - UINT_32 heightAmp = totalAmpBits - widthAmp; + UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); + UINT_32 heightAmp = totalAmpBits - widthAmp; metaBlkDim.w <<= widthAmp; metaBlkDim.h <<= heightAmp; @@ -221,39 +221,42 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices, &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); - UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2; + UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; - if (m_settings.htileAlignFix) + if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) { - sizeAlign <<= 1; + align *= (numPipeTotal >> 1); } - pOut->pitch = numMetaBlkX * metaBlkDim.w; - pOut->height = numMetaBlkY * metaBlkDim.h; - pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4; - - pOut->metaBlkWidth = metaBlkDim.w; - pOut->metaBlkHeight = metaBlkDim.h; - pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; - - pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign); + align = Max(align, metaBlkSize); if (m_settings.metaBaseAlignFix) { - pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode)); + align = Max(align, GetBlockSize(pIn->swizzleMode)); } - if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) + if (m_settings.htileAlignFix) { - UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2; + const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2; + const INT_32 htileCachelineSizeLog2 = 11; + const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal); - if (additionalAlign > sizeAlign) - { - sizeAlign = additionalAlign; - } + INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits)); + + align <<= rbMaskPadding; } - pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); + pOut->pitch = numMetaBlkX * metaBlkDim.w; + pOut->height = numMetaBlkY * metaBlkDim.h; + pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize; + + pOut->metaBlkWidth = metaBlkDim.w; + pOut->metaBlkHeight = metaBlkDim.h; + pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; + + pOut->baseAlign = align; + pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align); return ADDR_OK; } @@ -333,17 +336,17 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo( UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + if (m_settings.metaBaseAlignFix) + { + sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); + } + pOut->pitch = numMetaBlkX * metaBlkDim.w; pOut->height = numMetaBlkY * metaBlkDim.h; pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1; pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign); - if (m_settings.metaBaseAlignFix) - { - pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode)); - } - pOut->metaBlkWidth = metaBlkDim.w; pOut->metaBlkHeight = metaBlkDim.h; @@ -638,16 +641,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( sizeAlign *= (numFrags / m_maxCompFrag); } + if (m_settings.metaBaseAlignFix) + { + sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); + } + pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ * numCompressBlkPerMetaBlk * numFrags; pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign); pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign); - if (m_settings.metaBaseAlignFix) - { - pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode)); - } - pOut->pitch = numMetaBlkX * metaBlkDim.w; pOut->height = numMetaBlkY * metaBlkDim.h; pOut->depth = numMetaBlkZ * metaBlkDim.d; @@ -670,21 +673,78 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( /** ************************************************************************************************************************ -* Gfx9Lib::HwlGetMaxAlignments +* Gfx9Lib::HwlComputeMaxBaseAlignments * * @brief * Gets maximum alignments * @return -* ADDR_E_RETURNCODE +* maximum alignments ************************************************************************************************************************ */ -ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure - ) const +UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const { - pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB); + return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB); +} - return ADDR_OK; +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +************************************************************************************************************************ +*/ +UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const +{ + // Max base alignment for Htile + const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z); + const UINT_32 maxNumRbTotal = m_se * m_rbPerSe; + + // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2), + // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic. + ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u)); + const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u); + + UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes; + + if (maxNumPipeTotal > 2) + { + maxBaseAlignHtile *= (maxNumPipeTotal >> 1); + } + + maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile); + + if (m_settings.metaBaseAlignFix) + { + maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB)); + } + + if (m_settings.htileAlignFix) + { + maxBaseAlignHtile *= maxNumPipeTotal; + } + + // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate + + // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate + UINT_32 maxBaseAlignDcc3D = 65536; + + if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1)) + { + maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u); + } + + // Max base alignment for Msaa Dcc + UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag); + + if (m_settings.metaBaseAlignFix) + { + maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB)); + } + + return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D)); } /** @@ -724,9 +784,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, - Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, + Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -798,9 +860,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -870,9 +934,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); + MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, pIn->swizzleMode); @@ -948,10 +1014,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); - const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, - Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, - compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}); + MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, + Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, + compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -1055,6 +1123,10 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( break; } + // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, + // and any larger value requires a post-process (left shift) on the output pipeBankXor bits. + ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B); + switch (gbAddrConfig.bits.NUM_BANKS) { case ADDR_CONFIG_1_BANK: @@ -1151,6 +1223,19 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( ADDR_ASSERT((m_blockVarSizeLog2 == 0) || ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u))); m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u); + + if ((m_rbPerSeLog2 == 1) && + (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) || + ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2))))) + { + ADDR_ASSERT(m_settings.isVega10 == FALSE); + ADDR_ASSERT(m_settings.isRaven == FALSE); + + if (m_settings.isVega12) + { + m_settings.htileCacheRbConflict = 1; + } + } } else { @@ -1187,6 +1272,7 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily( case FAMILY_AI: m_settings.isArcticIsland = 1; m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision); + m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision); m_settings.isDce12 = 1; @@ -3279,10 +3365,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( addrPreferredSwSet.value = AddrSwSetZ; addrValidSwSet.value = AddrSwSetZ; - if (pIn->flags.depth && pIn->flags.texture) + if (pIn->flags.noMetadata == FALSE) { - if (((bpp == 16) && (numFrags >= 4)) || - ((bpp == 32) && (numFrags >= 2))) + if (pIn->flags.depth && + pIn->flags.texture && + (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2)))) { // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane // equation from wrong address within memory range a tile covered and use the @@ -3290,6 +3377,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( pOut->canXor = FALSE; prtXor = FALSE; } + + if (m_settings.htileCacheRbConflict && + (pIn->flags.depth || pIn->flags.stencil) && + (slice > 1) && + (pIn->flags.metaRbUnaligned == FALSE) && + (pIn->flags.metaPipeUnaligned == FALSE)) + { + // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency + pOut->canXor = FALSE; + } } } else if (ElemLib::IsBlockCompressed(pIn->format)) @@ -3402,12 +3499,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( if (pIn->bpp == 64) { addrPreferredSwSet.value = AddrSwSetD; - addrValidSwSet.value = AddrSwSetD; + addrValidSwSet.value = AddrSwSetS | AddrSwSetD; } else { addrPreferredSwSet.value = AddrSwSetS; - addrValidSwSet.value = AddrSwSetS | AddrSwSetD; + addrValidSwSet.value = AddrSwSetS; } blockSet.micro = FALSE; @@ -4037,7 +4134,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight * (pIn->bpp >> 3) * pIn->numFrags; pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice; - pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode); + pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode); if (pIn->flags.prt) { @@ -4762,15 +4859,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth; UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight; UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock; - UINT_32 macroBlockIndex = + UINT_64 macroBlockIndex = (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock + ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock + ((pIn->x / localOut.blockWidth) + mipStartPos.w); - UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) << - GetBlockSizeLog2(pIn->swizzleMode)); - - pOut->addr = blockOffset | macroBlockOffset; + pOut->addr = blockOffset | (macroBlockIndex << log2blkSize); } else { @@ -4835,7 +4929,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth; UINT_32 sliceSizeInBlock = (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock; - UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; pOut->addr = blockOffset | (blockIndex << log2blkSize); } diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.h b/src/amd/addrlib/gfx9/gfx9addrlib.h index 1f233a4ff91..7c61a40880e 100644 --- a/src/amd/addrlib/gfx9/gfx9addrlib.h +++ b/src/amd/addrlib/gfx9/gfx9addrlib.h @@ -55,19 +55,19 @@ struct Gfx9ChipSettings UINT_32 isArcticIsland : 1; UINT_32 isVega10 : 1; UINT_32 isRaven : 1; - UINT_32 reserved0 : 29; + UINT_32 isVega12 : 1; // Display engine IP version name UINT_32 isDce12 : 1; UINT_32 isDcn1 : 1; - UINT_32 reserved1 : 29; // Misc configuration bits UINT_32 metaBaseAlignFix : 1; UINT_32 depthPipeXorDisable : 1; UINT_32 htileAlignFix : 1; UINT_32 applyAliasFix : 1; - UINT_32 reserved2 : 28; + UINT_32 htileCacheRbConflict: 1; + UINT_32 reserved2 : 27; }; }; @@ -121,9 +121,6 @@ public: return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL; } - virtual BOOL_32 IsValidDisplaySwizzleMode( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; - protected: Gfx9Lib(const Client* pClient); virtual ~Gfx9Lib(); @@ -224,7 +221,7 @@ protected: AddrSwizzleMode swMode, UINT_32 elementBytesLog2) const; - virtual UINT_32 HwlComputeSurfaceBaseAlign(AddrSwizzleMode swizzleMode) const + UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const { UINT_32 baseAlign; @@ -400,11 +397,11 @@ protected: static const UINT_32 MaxCachedMetaEq = 2; private: - virtual ADDR_E_RETURNCODE HwlGetMaxAlignments( - ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; + virtual UINT_32 HwlComputeMaxBaseAlignments() const; - virtual BOOL_32 HwlInitGlobalParams( - const ADDR_CREATE_INPUT* pCreateIn); + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; + + virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn); VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const; @@ -434,6 +431,8 @@ private: UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth, UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const; + BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + ADDR_E_RETURNCODE ComputeSurfaceLinearPadding( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32* pMipmap0PaddedWidth, |