summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrans Gu <[email protected]>2016-03-04 05:04:23 -0500
committerMarek Olšák <[email protected]>2017-03-30 14:44:33 +0200
commit6764d96eaaebc641c81cfc5666d99e8aa6ae698c (patch)
tree4d184dd088d406099bbc0c9674ab178100420532
parented1aca8e8f8a33f28323c16688880979d2165378 (diff)
amdgpu/addrlib: Adjust bank equation bit order based on macro tile aspect ratio settings
By this way, we can have valid equation for 2D_THIN1 tile mode. Add flag "preferEquation" to return equation index without adjusting input tile mode.
-rw-r--r--src/amd/addrlib/addrinterface.h7
-rw-r--r--src/amd/addrlib/r800/ciaddrlib.cpp80
-rw-r--r--src/amd/addrlib/r800/siaddrlib.cpp271
-rw-r--r--src/amd/addrlib/r800/siaddrlib.h15
4 files changed, 282 insertions, 91 deletions
diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h
index 065545e8f01..c68cacf0952 100644
--- a/src/amd/addrlib/addrinterface.h
+++ b/src/amd/addrlib/addrinterface.h
@@ -146,10 +146,12 @@ typedef union _ADDR_EQUATION_KEY
UINT_32 tileMode : 5; ///< Tile mode
UINT_32 microTileType : 3; ///< Micro tile type
UINT_32 pipeConfig : 5; ///< pipe config
- UINT_32 numBanks : 5; ///< Number of banks
+ UINT_32 numBanksLog2 : 3; ///< Number of banks log2
UINT_32 bankWidth : 4; ///< Bank width
UINT_32 bankHeight : 4; ///< Bank height
UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio
+ UINT_32 prt : 1; ///< SI only, indicate whether this equation is for prt
+ UINT_32 reserved : 1; ///< Reserved bit
} fields;
UINT_32 value;
} ADDR_EQUATION_KEY;
@@ -516,7 +518,8 @@ typedef union _ADDR_SURFACE_FLAGS
UINT_32 skipIndicesOutput : 1; ///< Skipping indices in output.
UINT_32 rotateDisplay : 1; ///< Rotate micro tile type
UINT_32 minimizeAlignment : 1; ///< Minimize alignment
- UINT_32 reserved : 5; ///< Reserved bits
+ UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode
+ UINT_32 reserved : 4; ///< Reserved bits
};
UINT_32 value;
diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp
index cf08566f869..2c62979619a 100644
--- a/src/amd/addrlib/r800/ciaddrlib.cpp
+++ b/src/amd/addrlib/r800/ciaddrlib.cpp
@@ -889,48 +889,54 @@ VOID CiLib::HwlOptimizeTileMode(
// Override 2D/3D macro tile mode to PRT_* tile mode if
// client driver requests this surface is equation compatible
- if ((pInOut->flags.needEquation == TRUE) &&
- (pInOut->numSamples <= 1) &&
- (IsMacroTiled(tileMode) == TRUE) &&
- (IsPrtTileMode(tileMode) == FALSE))
+ if (IsMacroTiled(tileMode) == TRUE)
{
- UINT_32 thickness = Thickness(tileMode);
-
- if ((pInOut->maxBaseAlign != 0) && (pInOut->maxBaseAlign < Block64K))
- {
- tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
- }
- else if (thickness == 1)
+ if ((pInOut->flags.needEquation == TRUE) &&
+ (pInOut->numSamples <= 1) &&
+ (IsPrtTileMode(tileMode) == FALSE))
{
- tileMode = ADDR_TM_PRT_TILED_THIN1;
- }
- else
- {
- static const UINT_32 PrtTileBytes = 0x10000;
- // First prt thick tile index in the tile mode table
- static const UINT_32 PrtThickTileIndex = 22;
- ADDR_TILEINFO tileInfo = {0};
-
- HwlComputeMacroModeIndex(PrtThickTileIndex,
- pInOut->flags,
- pInOut->bpp,
- pInOut->numSamples,
- &tileInfo);
-
- UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples *
- thickness * HwlGetPipes(&tileInfo) *
- tileInfo.banks * tileInfo.bankWidth *
- tileInfo.bankHeight;
-
- if (macroTileBytes <= PrtTileBytes)
- {
- tileMode = ADDR_TM_PRT_TILED_THICK;
- }
- else
+ if ((pInOut->numSlices > 1) && ((pInOut->maxBaseAlign == 0) || (pInOut->maxBaseAlign >= Block64K)))
{
- tileMode = ADDR_TM_PRT_TILED_THIN1;
+ UINT_32 thickness = Thickness(tileMode);
+
+ if (thickness == 1)
+ {
+ tileMode = ADDR_TM_PRT_TILED_THIN1;
+ }
+ else
+ {
+ static const UINT_32 PrtTileBytes = 0x10000;
+ // First prt thick tile index in the tile mode table
+ static const UINT_32 PrtThickTileIndex = 22;
+ ADDR_TILEINFO tileInfo = {0};
+
+ HwlComputeMacroModeIndex(PrtThickTileIndex,
+ pInOut->flags,
+ pInOut->bpp,
+ pInOut->numSamples,
+ &tileInfo);
+
+ UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples *
+ thickness * HwlGetPipes(&tileInfo) *
+ tileInfo.banks * tileInfo.bankWidth *
+ tileInfo.bankHeight;
+
+ if (macroTileBytes <= PrtTileBytes)
+ {
+ tileMode = ADDR_TM_PRT_TILED_THICK;
+ }
+ else
+ {
+ tileMode = ADDR_TM_PRT_TILED_THIN1;
+ }
+ }
}
}
+
+ if (pInOut->maxBaseAlign != 0)
+ {
+ pInOut->flags.dccCompatible = FALSE;
+ }
}
if (tileMode != pInOut->tileMode)
diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp
index c1f6dac15a1..d3e94868dea 100644
--- a/src/amd/addrlib/r800/siaddrlib.cpp
+++ b/src/amd/addrlib/r800/siaddrlib.cpp
@@ -67,6 +67,43 @@ Lib* SiHwlInit(const Client* pClient)
namespace V1
{
+// We don't support MSAA for equation
+const BOOL_32 SiLib::m_EquationSupport[SiLib::TileTableSize][SiLib::MaxNumElementBytes] =
+{
+ {TRUE, TRUE, TRUE, FALSE, FALSE}, // 0, non-AA compressed depth or any stencil
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 1, 2xAA/4xAA compressed depth with or without stencil
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 2, 8xAA compressed depth with or without stencil
+ {FALSE, TRUE, FALSE, FALSE, FALSE}, // 3, 16 bpp depth PRT (non-MSAA), don't support uncompressed depth
+ {TRUE, TRUE, TRUE, FALSE, FALSE}, // 4, 1D depth
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 5, 16 bpp depth PRT (4xMSAA)
+ {FALSE, FALSE, TRUE, FALSE, FALSE}, // 6, 32 bpp depth PRT (non-MSAA)
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 7, 32 bpp depth PRT (4xMSAA)
+ {TRUE, TRUE, TRUE, TRUE, TRUE }, // 8, Linear
+ {TRUE, TRUE, TRUE, TRUE, TRUE }, // 9, 1D display
+ {TRUE, FALSE, FALSE, FALSE, FALSE}, // 10, 8 bpp color (displayable)
+ {FALSE, TRUE, FALSE, FALSE, FALSE}, // 11, 16 bpp color (displayable)
+ {FALSE, FALSE, TRUE, TRUE, FALSE}, // 12, 32/64 bpp color (displayable)
+ {TRUE, TRUE, TRUE, TRUE, TRUE }, // 13, 1D thin
+ {TRUE, FALSE, FALSE, FALSE, FALSE}, // 14, 8 bpp color non-displayable
+ {FALSE, TRUE, FALSE, FALSE, FALSE}, // 15, 16 bpp color non-displayable
+ {FALSE, FALSE, TRUE, FALSE, FALSE}, // 16, 32 bpp color non-displayable
+ {FALSE, FALSE, FALSE, TRUE, TRUE }, // 17, 64/128 bpp color non-displayable
+ {TRUE, TRUE, TRUE, TRUE, TRUE }, // 18, 1D THICK
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 19, 2D XTHICK
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 20, 2D THICK
+ {TRUE, FALSE, FALSE, FALSE, FALSE}, // 21, 8 bpp 2D PRTs (non-MSAA)
+ {FALSE, TRUE, FALSE, FALSE, FALSE}, // 22, 16 bpp 2D PRTs (non-MSAA)
+ {FALSE, FALSE, TRUE, FALSE, FALSE}, // 23, 32 bpp 2D PRTs (non-MSAA)
+ {FALSE, FALSE, FALSE, TRUE, FALSE}, // 24, 64 bpp 2D PRTs (non-MSAA)
+ {FALSE, FALSE, FALSE, FALSE, TRUE }, // 25, 128bpp 2D PRTs (non-MSAA)
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 26, none
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 27, none
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 28, none
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 29, none
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 30, 64bpp 2D PRTs (4xMSAA)
+ {FALSE, FALSE, FALSE, FALSE, FALSE}, // 31, none
+};
+
/**
****************************************************************************************************
* SiLib::SiLib
@@ -219,37 +256,132 @@ ADDR_E_RETURNCODE SiLib::ComputeBankEquation(
switch (pTileInfo->banks)
{
case 16:
- pEquation->addr[0] = y6;
- pEquation->xor1[0] = x3;
- pEquation->addr[1] = y5;
- pEquation->xor1[1] = y6;
- pEquation->xor2[1] = x4;
- pEquation->addr[2] = y4;
- pEquation->xor1[2] = x5;
- pEquation->addr[3] = y3;
- pEquation->xor1[3] = x6;
+ if (pTileInfo->macroAspectRatio == 1)
+ {
+ pEquation->addr[0] = y6;
+ pEquation->xor1[0] = x3;
+ pEquation->addr[1] = y5;
+ pEquation->xor1[1] = y6;
+ pEquation->xor2[1] = x4;
+ pEquation->addr[2] = y4;
+ pEquation->xor1[2] = x5;
+ pEquation->addr[3] = y3;
+ pEquation->xor1[3] = x6;
+ }
+ else if (pTileInfo->macroAspectRatio == 2)
+ {
+ pEquation->addr[0] = x3;
+ pEquation->xor1[0] = y6;
+ pEquation->addr[1] = y5;
+ pEquation->xor1[1] = y6;
+ pEquation->xor2[1] = x4;
+ pEquation->addr[2] = y4;
+ pEquation->xor1[2] = x5;
+ pEquation->addr[3] = y3;
+ pEquation->xor1[3] = x6;
+ }
+ else if (pTileInfo->macroAspectRatio == 4)
+ {
+ pEquation->addr[0] = x3;
+ pEquation->xor1[0] = y6;
+ pEquation->addr[1] = x4;
+ pEquation->xor1[1] = y5;
+ pEquation->xor2[1] = y6;
+ pEquation->addr[2] = y4;
+ pEquation->xor1[2] = x5;
+ pEquation->addr[3] = y3;
+ pEquation->xor1[3] = x6;
+ }
+ else if (pTileInfo->macroAspectRatio == 8)
+ {
+ pEquation->addr[0] = x3;
+ pEquation->xor1[0] = y6;
+ pEquation->addr[1] = x4;
+ pEquation->xor1[1] = y5;
+ pEquation->xor2[1] = y6;
+ pEquation->addr[2] = x5;
+ pEquation->xor1[2] = y4;
+ pEquation->addr[3] = y3;
+ pEquation->xor1[3] = x6;
+ }
+ else
+ {
+ ADDR_ASSERT_ALWAYS();
+ }
pEquation->numBits = 4;
break;
case 8:
- pEquation->addr[0] = y5;
- pEquation->xor1[0] = x3;
- pEquation->addr[1] = y4;
- pEquation->xor1[1] = y5;
- pEquation->xor2[1] = x4;
- pEquation->addr[2] = y3;
- pEquation->xor1[2] = x5;
+ if (pTileInfo->macroAspectRatio == 1)
+ {
+ pEquation->addr[0] = y5;
+ pEquation->xor1[0] = x3;
+ pEquation->addr[1] = y4;
+ pEquation->xor1[1] = y5;
+ pEquation->xor2[1] = x4;
+ pEquation->addr[2] = y3;
+ pEquation->xor1[2] = x5;
+ }
+ else if (pTileInfo->macroAspectRatio == 2)
+ {
+ pEquation->addr[0] = x3;
+ pEquation->xor1[0] = y5;
+ pEquation->addr[1] = y4;
+ pEquation->xor1[1] = y5;
+ pEquation->xor2[1] = x4;
+ pEquation->addr[2] = y3;
+ pEquation->xor1[2] = x5;
+ }
+ else if (pTileInfo->macroAspectRatio == 4)
+ {
+ pEquation->addr[0] = x3;
+ pEquation->xor1[0] = y5;
+ pEquation->addr[1] = x4;
+ pEquation->xor1[1] = y4;
+ pEquation->xor2[1] = y5;
+ pEquation->addr[2] = y3;
+ pEquation->xor1[2] = x5;
+ }
+ else
+ {
+ ADDR_ASSERT_ALWAYS();
+ }
pEquation->numBits = 3;
break;
case 4:
- pEquation->addr[0] = y4;
- pEquation->xor1[0] = x3;
- pEquation->addr[1] = y3;
- pEquation->xor1[1] = x4;
+ if (pTileInfo->macroAspectRatio == 1)
+ {
+ pEquation->addr[0] = y4;
+ pEquation->xor1[0] = x3;
+ pEquation->addr[1] = y3;
+ pEquation->xor1[1] = x4;
+ }
+ else if (pTileInfo->macroAspectRatio == 2)
+ {
+ pEquation->addr[0] = x3;
+ pEquation->xor1[0] = y4;
+ pEquation->addr[1] = y3;
+ pEquation->xor1[1] = x4;
+ }
+ else
+ {
+ pEquation->addr[0] = x3;
+ pEquation->xor1[0] = y4;
+ pEquation->addr[1] = x4;
+ pEquation->xor1[1] = y3;
+ }
pEquation->numBits = 2;
break;
case 2:
- pEquation->addr[0] = y3;
- pEquation->xor1[0] = x3;
+ if (pTileInfo->macroAspectRatio == 1)
+ {
+ pEquation->addr[0] = y3;
+ pEquation->xor1[0] = x3;
+ }
+ else
+ {
+ pEquation->addr[0] = x3;
+ pEquation->xor1[0] = y3;
+ }
pEquation->numBits = 1;
break;
default:
@@ -2522,11 +2654,24 @@ ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo(
UINT_32 tileIndex = static_cast<UINT_32>(pOut->tileIndex);
- if ((pIn->flags.needEquation == TRUE) &&
+ if (((pIn->flags.needEquation == TRUE) ||
+ (pIn->flags.preferEquation == TRUE)) &&
(pIn->numSamples <= 1) &&
(tileIndex < TileTableSize))
{
- pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex];
+ static const UINT_32 SiUncompressDepthTileIndex = 3;
+
+ if ((pIn->flags.prt == FALSE) &&
+ (m_uncompressDepthEqIndex != 0) &&
+ (tileIndex == SiUncompressDepthTileIndex))
+ {
+ pOut->equationIndex = m_uncompressDepthEqIndex + Log2(pIn->bpp >> 3);
+ }
+ else
+ {
+
+ pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex];
+ }
if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX)
{
@@ -3157,8 +3302,6 @@ VOID SiLib::HwlOptimizeTileMode(
{
UINT_32 thickness = Thickness(tileMode);
- pInOut->flags.prt = TRUE;
-
if (thickness > 1)
{
tileMode = ADDR_TM_1D_TILED_THICK;
@@ -3449,7 +3592,7 @@ VOID SiLib::InitEquationTable()
HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL);
// Check if the input is supported
- if (IsEquationSupported(bpp, tileConfig, tileIndex) == TRUE)
+ if (IsEquationSupported(bpp, tileConfig, tileIndex, log2ElementBytes) == TRUE)
{
ADDR_EQUATION_KEY key = {{0}};
@@ -3461,10 +3604,12 @@ VOID SiLib::InitEquationTable()
key.fields.microTileType = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ?
ADDR_NON_DISPLAYABLE : tileConfig.type;
key.fields.pipeConfig = tileConfig.info.pipeConfig;
- key.fields.numBanks = tileConfig.info.banks;
+ key.fields.numBanksLog2 = Log2(tileConfig.info.banks);
key.fields.bankWidth = tileConfig.info.bankWidth;
key.fields.bankHeight = tileConfig.info.bankHeight;
key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio;
+ key.fields.prt = ((m_chipFamily == ADDR_CHIP_FAMILY_SI) &&
+ ((1 << tileIndex) & SiPrtTileIndexMask)) ? 1 : 0;
// Find in the table if the equation has been built based on the key
for (UINT_32 i = 0; i < m_numEquations; i++)
@@ -3528,7 +3673,7 @@ VOID SiLib::InitEquationTable()
MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
pTileInfo->macroAspectRatio;
- if (m_chipFamily == ADDR_CHIP_FAMILY_SI)
+ if (key.fields.prt)
{
UINT_32 macroTileSize =
m_blockWidth[equationIndex] * m_blockHeight[equationIndex] *
@@ -3571,6 +3716,48 @@ VOID SiLib::InitEquationTable()
// fill the invalid equation index
m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex;
}
+
+ if (m_chipFamily == ADDR_CHIP_FAMILY_SI)
+ {
+ // For tile index 3 which is shared between PRT depth and uncompressed depth
+ m_uncompressDepthEqIndex = m_numEquations;
+
+ for (UINT_32 log2ElemBytes = 0; log2ElemBytes < MaxNumElementBytes; log2ElemBytes++)
+ {
+ TileConfig tileConfig = m_tileTable[3];
+ ADDR_EQUATION equation;
+ ADDR_E_RETURNCODE retCode;
+
+ memset(&equation, 0, sizeof(ADDR_EQUATION));
+
+ retCode = ComputeMacroTileEquation(log2ElemBytes,
+ tileConfig.mode,
+ tileConfig.type,
+ &tileConfig.info,
+ &equation);
+
+ if (retCode == ADDR_OK)
+ {
+ UINT_32 equationIndex = m_numEquations;
+ ADDR_ASSERT(equationIndex < EquationTableSize);
+
+ m_blockSlices[equationIndex] = 1;
+
+ const ADDR_TILEINFO* pTileInfo = &tileConfig.info;
+
+ m_blockWidth[equationIndex] =
+ HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth *
+ pTileInfo->macroAspectRatio;
+ m_blockHeight[equationIndex] =
+ MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
+ pTileInfo->macroAspectRatio;
+
+ m_equationTable[equationIndex] = equation;
+
+ m_numEquations++;
+ }
+ }
+ }
}
}
@@ -3586,9 +3773,10 @@ VOID SiLib::InitEquationTable()
****************************************************************************************************
*/
BOOL_32 SiLib::IsEquationSupported(
- UINT_32 bpp, ///< Bits per pixel
- TileConfig tileConfig, ///< Tile config
- INT_32 tileIndex ///< Tile index
+ UINT_32 bpp, ///< Bits per pixel
+ TileConfig tileConfig, ///< Tile config
+ INT_32 tileIndex, ///< Tile index
+ UINT_32 elementBytesLog2 ///< Log2 of element bytes
) const
{
BOOL_32 supported = TRUE;
@@ -3624,24 +3812,7 @@ BOOL_32 SiLib::IsEquationSupported(
if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI))
{
- // Please refer to SiLib::HwlSetupTileInfo for PRT tile index selecting
- // Tile index 3, 6, 21-25 are for PRT single sample
- if (tileIndex == 3)
- {
- supported = (bpp == 16);
- }
- else if (tileIndex == 6)
- {
- supported = (bpp == 32);
- }
- else if ((tileIndex >= 21) && (tileIndex <= 25))
- {
- supported = (bpp == 8u * (1u << (static_cast<UINT_32>(tileIndex) - 21u)));
- }
- else
- {
- supported = FALSE;
- }
+ supported = m_EquationSupport[tileIndex][elementBytesLog2];
}
}
diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h
index 6daaa2388a7..37e26ffc26e 100644
--- a/src/amd/addrlib/r800/siaddrlib.h
+++ b/src/amd/addrlib/r800/siaddrlib.h
@@ -261,7 +261,7 @@ protected:
// Check if it is supported for given bpp and tile config to generate an equation
BOOL_32 IsEquationSupported(
- UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex) const;
+ UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const;
// Protected non-virtual functions
VOID ComputeTileCoordFromPipeAndElemIdx(
@@ -289,10 +289,19 @@ protected:
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxNumElementBytes = 5;
+
+ static const BOOL_32 m_EquationSupport[TileTableSize][MaxNumElementBytes];
+
+ // Prt tile mode index mask
+ static const UINT_32 SiPrtTileIndexMask = ((1 << 3) | (1 << 5) | (1 << 6) | (1 << 7) |
+ (1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) |
+ (1 << 25) | (1 << 30));
+
// More than half slots in tile mode table can't support equation
- static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2;
+ static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];
+ UINT_32 m_numMacroBits[EquationTableSize];
UINT_32 m_blockWidth[EquationTableSize];
UINT_32 m_blockHeight[EquationTableSize];
UINT_32 m_blockSlices[EquationTableSize];
@@ -301,6 +310,8 @@ protected:
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxNumElementBytes][TileTableSize];
+ UINT_32 m_uncompressDepthEqIndex;
+
private:
VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const;