diff options
author | Frans Gu <[email protected]> | 2016-03-04 05:04:23 -0500 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2017-03-30 14:44:33 +0200 |
commit | 6764d96eaaebc641c81cfc5666d99e8aa6ae698c (patch) | |
tree | 4d184dd088d406099bbc0c9674ab178100420532 | |
parent | ed1aca8e8f8a33f28323c16688880979d2165378 (diff) |
amdgpu/addrlib: Adjust bank equation bit order based on macro tile aspect ratio settings
By this way, we can have valid equation for 2D_THIN1 tile mode.
Add flag "preferEquation" to return equation index without adjusting
input tile mode.
-rw-r--r-- | src/amd/addrlib/addrinterface.h | 7 | ||||
-rw-r--r-- | src/amd/addrlib/r800/ciaddrlib.cpp | 80 | ||||
-rw-r--r-- | src/amd/addrlib/r800/siaddrlib.cpp | 271 | ||||
-rw-r--r-- | src/amd/addrlib/r800/siaddrlib.h | 15 |
4 files changed, 282 insertions, 91 deletions
diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h index 065545e8f01..c68cacf0952 100644 --- a/src/amd/addrlib/addrinterface.h +++ b/src/amd/addrlib/addrinterface.h @@ -146,10 +146,12 @@ typedef union _ADDR_EQUATION_KEY UINT_32 tileMode : 5; ///< Tile mode UINT_32 microTileType : 3; ///< Micro tile type UINT_32 pipeConfig : 5; ///< pipe config - UINT_32 numBanks : 5; ///< Number of banks + UINT_32 numBanksLog2 : 3; ///< Number of banks log2 UINT_32 bankWidth : 4; ///< Bank width UINT_32 bankHeight : 4; ///< Bank height UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio + UINT_32 prt : 1; ///< SI only, indicate whether this equation is for prt + UINT_32 reserved : 1; ///< Reserved bit } fields; UINT_32 value; } ADDR_EQUATION_KEY; @@ -516,7 +518,8 @@ typedef union _ADDR_SURFACE_FLAGS UINT_32 skipIndicesOutput : 1; ///< Skipping indices in output. UINT_32 rotateDisplay : 1; ///< Rotate micro tile type UINT_32 minimizeAlignment : 1; ///< Minimize alignment - UINT_32 reserved : 5; ///< Reserved bits + UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode + UINT_32 reserved : 4; ///< Reserved bits }; UINT_32 value; diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp index cf08566f869..2c62979619a 100644 --- a/src/amd/addrlib/r800/ciaddrlib.cpp +++ b/src/amd/addrlib/r800/ciaddrlib.cpp @@ -889,48 +889,54 @@ VOID CiLib::HwlOptimizeTileMode( // Override 2D/3D macro tile mode to PRT_* tile mode if // client driver requests this surface is equation compatible - if ((pInOut->flags.needEquation == TRUE) && - (pInOut->numSamples <= 1) && - (IsMacroTiled(tileMode) == TRUE) && - (IsPrtTileMode(tileMode) == FALSE)) + if (IsMacroTiled(tileMode) == TRUE) { - UINT_32 thickness = Thickness(tileMode); - - if ((pInOut->maxBaseAlign != 0) && (pInOut->maxBaseAlign < Block64K)) - { - tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; - } - else if (thickness == 1) + if ((pInOut->flags.needEquation == TRUE) && + (pInOut->numSamples <= 1) && + (IsPrtTileMode(tileMode) == FALSE)) { - tileMode = ADDR_TM_PRT_TILED_THIN1; - } - else - { - static const UINT_32 PrtTileBytes = 0x10000; - // First prt thick tile index in the tile mode table - static const UINT_32 PrtThickTileIndex = 22; - ADDR_TILEINFO tileInfo = {0}; - - HwlComputeMacroModeIndex(PrtThickTileIndex, - pInOut->flags, - pInOut->bpp, - pInOut->numSamples, - &tileInfo); - - UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples * - thickness * HwlGetPipes(&tileInfo) * - tileInfo.banks * tileInfo.bankWidth * - tileInfo.bankHeight; - - if (macroTileBytes <= PrtTileBytes) - { - tileMode = ADDR_TM_PRT_TILED_THICK; - } - else + if ((pInOut->numSlices > 1) && ((pInOut->maxBaseAlign == 0) || (pInOut->maxBaseAlign >= Block64K))) { - tileMode = ADDR_TM_PRT_TILED_THIN1; + UINT_32 thickness = Thickness(tileMode); + + if (thickness == 1) + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + else + { + static const UINT_32 PrtTileBytes = 0x10000; + // First prt thick tile index in the tile mode table + static const UINT_32 PrtThickTileIndex = 22; + ADDR_TILEINFO tileInfo = {0}; + + HwlComputeMacroModeIndex(PrtThickTileIndex, + pInOut->flags, + pInOut->bpp, + pInOut->numSamples, + &tileInfo); + + UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples * + thickness * HwlGetPipes(&tileInfo) * + tileInfo.banks * tileInfo.bankWidth * + tileInfo.bankHeight; + + if (macroTileBytes <= PrtTileBytes) + { + tileMode = ADDR_TM_PRT_TILED_THICK; + } + else + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + } } } + + if (pInOut->maxBaseAlign != 0) + { + pInOut->flags.dccCompatible = FALSE; + } } if (tileMode != pInOut->tileMode) diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp index c1f6dac15a1..d3e94868dea 100644 --- a/src/amd/addrlib/r800/siaddrlib.cpp +++ b/src/amd/addrlib/r800/siaddrlib.cpp @@ -67,6 +67,43 @@ Lib* SiHwlInit(const Client* pClient) namespace V1 { +// We don't support MSAA for equation +const BOOL_32 SiLib::m_EquationSupport[SiLib::TileTableSize][SiLib::MaxNumElementBytes] = +{ + {TRUE, TRUE, TRUE, FALSE, FALSE}, // 0, non-AA compressed depth or any stencil + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 1, 2xAA/4xAA compressed depth with or without stencil + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 2, 8xAA compressed depth with or without stencil + {FALSE, TRUE, FALSE, FALSE, FALSE}, // 3, 16 bpp depth PRT (non-MSAA), don't support uncompressed depth + {TRUE, TRUE, TRUE, FALSE, FALSE}, // 4, 1D depth + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 5, 16 bpp depth PRT (4xMSAA) + {FALSE, FALSE, TRUE, FALSE, FALSE}, // 6, 32 bpp depth PRT (non-MSAA) + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 7, 32 bpp depth PRT (4xMSAA) + {TRUE, TRUE, TRUE, TRUE, TRUE }, // 8, Linear + {TRUE, TRUE, TRUE, TRUE, TRUE }, // 9, 1D display + {TRUE, FALSE, FALSE, FALSE, FALSE}, // 10, 8 bpp color (displayable) + {FALSE, TRUE, FALSE, FALSE, FALSE}, // 11, 16 bpp color (displayable) + {FALSE, FALSE, TRUE, TRUE, FALSE}, // 12, 32/64 bpp color (displayable) + {TRUE, TRUE, TRUE, TRUE, TRUE }, // 13, 1D thin + {TRUE, FALSE, FALSE, FALSE, FALSE}, // 14, 8 bpp color non-displayable + {FALSE, TRUE, FALSE, FALSE, FALSE}, // 15, 16 bpp color non-displayable + {FALSE, FALSE, TRUE, FALSE, FALSE}, // 16, 32 bpp color non-displayable + {FALSE, FALSE, FALSE, TRUE, TRUE }, // 17, 64/128 bpp color non-displayable + {TRUE, TRUE, TRUE, TRUE, TRUE }, // 18, 1D THICK + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 19, 2D XTHICK + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 20, 2D THICK + {TRUE, FALSE, FALSE, FALSE, FALSE}, // 21, 8 bpp 2D PRTs (non-MSAA) + {FALSE, TRUE, FALSE, FALSE, FALSE}, // 22, 16 bpp 2D PRTs (non-MSAA) + {FALSE, FALSE, TRUE, FALSE, FALSE}, // 23, 32 bpp 2D PRTs (non-MSAA) + {FALSE, FALSE, FALSE, TRUE, FALSE}, // 24, 64 bpp 2D PRTs (non-MSAA) + {FALSE, FALSE, FALSE, FALSE, TRUE }, // 25, 128bpp 2D PRTs (non-MSAA) + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 26, none + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 27, none + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 28, none + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 29, none + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 30, 64bpp 2D PRTs (4xMSAA) + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 31, none +}; + /** **************************************************************************************************** * SiLib::SiLib @@ -219,37 +256,132 @@ ADDR_E_RETURNCODE SiLib::ComputeBankEquation( switch (pTileInfo->banks) { case 16: - pEquation->addr[0] = y6; - pEquation->xor1[0] = x3; - pEquation->addr[1] = y5; - pEquation->xor1[1] = y6; - pEquation->xor2[1] = x4; - pEquation->addr[2] = y4; - pEquation->xor1[2] = x5; - pEquation->addr[3] = y3; - pEquation->xor1[3] = x6; + if (pTileInfo->macroAspectRatio == 1) + { + pEquation->addr[0] = y6; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y5; + pEquation->xor1[1] = y6; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y4; + pEquation->xor1[2] = x5; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + } + else if (pTileInfo->macroAspectRatio == 2) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y6; + pEquation->addr[1] = y5; + pEquation->xor1[1] = y6; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y4; + pEquation->xor1[2] = x5; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + } + else if (pTileInfo->macroAspectRatio == 4) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y6; + pEquation->addr[1] = x4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = y6; + pEquation->addr[2] = y4; + pEquation->xor1[2] = x5; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + } + else if (pTileInfo->macroAspectRatio == 8) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y6; + pEquation->addr[1] = x4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = y6; + pEquation->addr[2] = x5; + pEquation->xor1[2] = y4; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + } + else + { + ADDR_ASSERT_ALWAYS(); + } pEquation->numBits = 4; break; case 8: - pEquation->addr[0] = y5; - pEquation->xor1[0] = x3; - pEquation->addr[1] = y4; - pEquation->xor1[1] = y5; - pEquation->xor2[1] = x4; - pEquation->addr[2] = y3; - pEquation->xor1[2] = x5; + if (pTileInfo->macroAspectRatio == 1) + { + pEquation->addr[0] = y5; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y3; + pEquation->xor1[2] = x5; + } + else if (pTileInfo->macroAspectRatio == 2) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y5; + pEquation->addr[1] = y4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y3; + pEquation->xor1[2] = x5; + } + else if (pTileInfo->macroAspectRatio == 4) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y5; + pEquation->addr[1] = x4; + pEquation->xor1[1] = y4; + pEquation->xor2[1] = y5; + pEquation->addr[2] = y3; + pEquation->xor1[2] = x5; + } + else + { + ADDR_ASSERT_ALWAYS(); + } pEquation->numBits = 3; break; case 4: - pEquation->addr[0] = y4; - pEquation->xor1[0] = x3; - pEquation->addr[1] = y3; - pEquation->xor1[1] = x4; + if (pTileInfo->macroAspectRatio == 1) + { + pEquation->addr[0] = y4; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y3; + pEquation->xor1[1] = x4; + } + else if (pTileInfo->macroAspectRatio == 2) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y4; + pEquation->addr[1] = y3; + pEquation->xor1[1] = x4; + } + else + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y4; + pEquation->addr[1] = x4; + pEquation->xor1[1] = y3; + } pEquation->numBits = 2; break; case 2: - pEquation->addr[0] = y3; - pEquation->xor1[0] = x3; + if (pTileInfo->macroAspectRatio == 1) + { + pEquation->addr[0] = y3; + pEquation->xor1[0] = x3; + } + else + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y3; + } pEquation->numBits = 1; break; default: @@ -2522,11 +2654,24 @@ ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo( UINT_32 tileIndex = static_cast<UINT_32>(pOut->tileIndex); - if ((pIn->flags.needEquation == TRUE) && + if (((pIn->flags.needEquation == TRUE) || + (pIn->flags.preferEquation == TRUE)) && (pIn->numSamples <= 1) && (tileIndex < TileTableSize)) { - pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex]; + static const UINT_32 SiUncompressDepthTileIndex = 3; + + if ((pIn->flags.prt == FALSE) && + (m_uncompressDepthEqIndex != 0) && + (tileIndex == SiUncompressDepthTileIndex)) + { + pOut->equationIndex = m_uncompressDepthEqIndex + Log2(pIn->bpp >> 3); + } + else + { + + pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex]; + } if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX) { @@ -3157,8 +3302,6 @@ VOID SiLib::HwlOptimizeTileMode( { UINT_32 thickness = Thickness(tileMode); - pInOut->flags.prt = TRUE; - if (thickness > 1) { tileMode = ADDR_TM_1D_TILED_THICK; @@ -3449,7 +3592,7 @@ VOID SiLib::InitEquationTable() HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL); // Check if the input is supported - if (IsEquationSupported(bpp, tileConfig, tileIndex) == TRUE) + if (IsEquationSupported(bpp, tileConfig, tileIndex, log2ElementBytes) == TRUE) { ADDR_EQUATION_KEY key = {{0}}; @@ -3461,10 +3604,12 @@ VOID SiLib::InitEquationTable() key.fields.microTileType = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ? ADDR_NON_DISPLAYABLE : tileConfig.type; key.fields.pipeConfig = tileConfig.info.pipeConfig; - key.fields.numBanks = tileConfig.info.banks; + key.fields.numBanksLog2 = Log2(tileConfig.info.banks); key.fields.bankWidth = tileConfig.info.bankWidth; key.fields.bankHeight = tileConfig.info.bankHeight; key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio; + key.fields.prt = ((m_chipFamily == ADDR_CHIP_FAMILY_SI) && + ((1 << tileIndex) & SiPrtTileIndexMask)) ? 1 : 0; // Find in the table if the equation has been built based on the key for (UINT_32 i = 0; i < m_numEquations; i++) @@ -3528,7 +3673,7 @@ VOID SiLib::InitEquationTable() MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / pTileInfo->macroAspectRatio; - if (m_chipFamily == ADDR_CHIP_FAMILY_SI) + if (key.fields.prt) { UINT_32 macroTileSize = m_blockWidth[equationIndex] * m_blockHeight[equationIndex] * @@ -3571,6 +3716,48 @@ VOID SiLib::InitEquationTable() // fill the invalid equation index m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex; } + + if (m_chipFamily == ADDR_CHIP_FAMILY_SI) + { + // For tile index 3 which is shared between PRT depth and uncompressed depth + m_uncompressDepthEqIndex = m_numEquations; + + for (UINT_32 log2ElemBytes = 0; log2ElemBytes < MaxNumElementBytes; log2ElemBytes++) + { + TileConfig tileConfig = m_tileTable[3]; + ADDR_EQUATION equation; + ADDR_E_RETURNCODE retCode; + + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + retCode = ComputeMacroTileEquation(log2ElemBytes, + tileConfig.mode, + tileConfig.type, + &tileConfig.info, + &equation); + + if (retCode == ADDR_OK) + { + UINT_32 equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < EquationTableSize); + + m_blockSlices[equationIndex] = 1; + + const ADDR_TILEINFO* pTileInfo = &tileConfig.info; + + m_blockWidth[equationIndex] = + HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth * + pTileInfo->macroAspectRatio; + m_blockHeight[equationIndex] = + MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / + pTileInfo->macroAspectRatio; + + m_equationTable[equationIndex] = equation; + + m_numEquations++; + } + } + } } } @@ -3586,9 +3773,10 @@ VOID SiLib::InitEquationTable() **************************************************************************************************** */ BOOL_32 SiLib::IsEquationSupported( - UINT_32 bpp, ///< Bits per pixel - TileConfig tileConfig, ///< Tile config - INT_32 tileIndex ///< Tile index + UINT_32 bpp, ///< Bits per pixel + TileConfig tileConfig, ///< Tile config + INT_32 tileIndex, ///< Tile index + UINT_32 elementBytesLog2 ///< Log2 of element bytes ) const { BOOL_32 supported = TRUE; @@ -3624,24 +3812,7 @@ BOOL_32 SiLib::IsEquationSupported( if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI)) { - // Please refer to SiLib::HwlSetupTileInfo for PRT tile index selecting - // Tile index 3, 6, 21-25 are for PRT single sample - if (tileIndex == 3) - { - supported = (bpp == 16); - } - else if (tileIndex == 6) - { - supported = (bpp == 32); - } - else if ((tileIndex >= 21) && (tileIndex <= 25)) - { - supported = (bpp == 8u * (1u << (static_cast<UINT_32>(tileIndex) - 21u))); - } - else - { - supported = FALSE; - } + supported = m_EquationSupport[tileIndex][elementBytesLog2]; } } diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h index 6daaa2388a7..37e26ffc26e 100644 --- a/src/amd/addrlib/r800/siaddrlib.h +++ b/src/amd/addrlib/r800/siaddrlib.h @@ -261,7 +261,7 @@ protected: // Check if it is supported for given bpp and tile config to generate an equation BOOL_32 IsEquationSupported( - UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex) const; + UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const; // Protected non-virtual functions VOID ComputeTileCoordFromPipeAndElemIdx( @@ -289,10 +289,19 @@ protected: // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp) static const UINT_32 MaxNumElementBytes = 5; + + static const BOOL_32 m_EquationSupport[TileTableSize][MaxNumElementBytes]; + + // Prt tile mode index mask + static const UINT_32 SiPrtTileIndexMask = ((1 << 3) | (1 << 5) | (1 << 6) | (1 << 7) | + (1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) | + (1 << 25) | (1 << 30)); + // More than half slots in tile mode table can't support equation - static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2; + static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2; // Equation table ADDR_EQUATION m_equationTable[EquationTableSize]; + UINT_32 m_numMacroBits[EquationTableSize]; UINT_32 m_blockWidth[EquationTableSize]; UINT_32 m_blockHeight[EquationTableSize]; UINT_32 m_blockSlices[EquationTableSize]; @@ -301,6 +310,8 @@ protected: // Equation lookup table according to bpp and tile index UINT_32 m_equationLookupTable[MaxNumElementBytes][TileTableSize]; + UINT_32 m_uncompressDepthEqIndex; + private: VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const; |