summaryrefslogtreecommitdiffstats
path: root/src/amd/addrlib/gfx9
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd/addrlib/gfx9')
-rw-r--r--src/amd/addrlib/gfx9/coord.cpp16
-rw-r--r--src/amd/addrlib/gfx9/coord.h16
-rw-r--r--src/amd/addrlib/gfx9/gfx9addrlib.cpp1084
-rw-r--r--src/amd/addrlib/gfx9/gfx9addrlib.h86
-rw-r--r--src/amd/addrlib/gfx9/rbmap.cpp1388
-rw-r--r--src/amd/addrlib/gfx9/rbmap.h142
6 files changed, 798 insertions, 1934 deletions
diff --git a/src/amd/addrlib/gfx9/coord.cpp b/src/amd/addrlib/gfx9/coord.cpp
index effdc90017e..228d8f1872b 100644
--- a/src/amd/addrlib/gfx9/coord.cpp
+++ b/src/amd/addrlib/gfx9/coord.cpp
@@ -34,20 +34,20 @@ Coordinate::Coordinate()
ord = 0;
}
-Coordinate::Coordinate(INT_8 c, UINT_32 n)
+Coordinate::Coordinate(INT_8 c, INT_32 n)
{
- set(c,n);
+ set(c, n);
}
-VOID Coordinate::set(INT_8 c, UINT_32 n)
+VOID Coordinate::set(INT_8 c, INT_32 n)
{
dim = c;
ord = static_cast<INT_8>(n);
}
-UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m)
+UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
- UINT_32 bit = 1 << (UINT_32)ord;
+ UINT_32 bit = static_cast<UINT_32>(1ull << static_cast<UINT_32>(ord));
UINT_32 out = 0;
switch (dim)
@@ -234,7 +234,7 @@ UINT_32 CoordTerm::getsize()
return num_coords;
}
-UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m)
+UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
UINT_32 out = 0;
for (UINT_32 i = 0; i < num_coords; i++)
@@ -386,7 +386,7 @@ UINT_32 CoordEq::getsize()
return m_numBits;
}
-UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m)
+UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
UINT_64 out = 0;
for (UINT_32 i = 0; i < m_numBits; i++)
@@ -401,7 +401,7 @@ UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m)
VOID CoordEq::solveAddr(
UINT_64 addr, UINT_32 sliceInM,
- UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m)
+ UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const
{
UINT_32 xBitsValid = 0;
UINT_32 yBitsValid = 0;
diff --git a/src/amd/addrlib/gfx9/coord.h b/src/amd/addrlib/gfx9/coord.h
index 28c57c17fe1..4243d3069a9 100644
--- a/src/amd/addrlib/gfx9/coord.h
+++ b/src/amd/addrlib/gfx9/coord.h
@@ -33,12 +33,12 @@ class Coordinate
{
public:
Coordinate();
- Coordinate(INT_8 c, UINT_32 n);
+ Coordinate(INT_8 c, INT_32 n);
- VOID set(INT_8 c, UINT_32 n);
- UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0);
- INT_8 getdim();
- INT_8 getord();
+ VOID set(INT_8 c, INT_32 n);
+ UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
+ INT_8 getdim();
+ INT_8 getord();
BOOL_32 operator==(const Coordinate& b);
BOOL_32 operator<(const Coordinate& b);
@@ -64,7 +64,7 @@ public:
BOOL_32 Exists(Coordinate& co);
VOID copyto(CoordTerm& cl);
UINT_32 getsize();
- UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0);
+ UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
VOID getsmallest(Coordinate& co);
UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0');
@@ -87,9 +87,9 @@ public:
BOOL_32 Exists(Coordinate& co);
VOID resize(UINT_32 n);
UINT_32 getsize();
- virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0);
+ virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM,
- UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m);
+ UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const;
VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
index edb4c6e636a..e06f13c0afe 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -34,14 +34,8 @@
#include "gfx9addrlib.h"
#include "gfx9_gb_reg.h"
-#include "gfx9_enum.h"
-#if BRAHMA_BUILD
-#include "amdgpu_id.h"
-#else
-#include "ai_id.h"
-#include "rv_id.h"
-#endif
+#include "amdgpu_asic_addr.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -183,7 +177,14 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
}
else
{
- numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ if (m_settings.applyAliasFix)
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
+ }
+ else
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ }
}
numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
@@ -222,6 +223,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+ if (m_settings.htileAlignFix)
+ {
+ sizeAlign <<= 1;
+ }
+
pOut->pitch = numMetaBlkX * metaBlkDim.w;
pOut->height = numMetaBlkY * metaBlkDim.h;
pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
@@ -284,7 +290,14 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
}
else
{
- numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ if (m_settings.applyAliasFix)
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
+ }
+ else
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ }
numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
}
@@ -569,8 +582,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
if ((numPipeTotal > 1) || (numRbTotal > 1))
{
+ const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
+
numCompressBlkPerMetaBlk =
- Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
+ Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
{
@@ -685,8 +700,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
input.size = sizeof(input);
@@ -710,11 +724,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
- CoordEq metaEq;
-
- GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
- Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
+ Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -724,7 +736,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
pOut->addr = address >> 1;
pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
@@ -754,8 +766,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
@@ -787,11 +798,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- CoordEq metaEq;
-
- GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -801,7 +810,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
pOut->addr = address >> 1;
@@ -830,8 +839,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
@@ -862,11 +870,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- CoordEq metaEq;
-
- GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
pIn->swizzleMode);
@@ -879,7 +885,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 x, y, z, s, m;
- metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
+ pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
pOut->slice = m / sliceSizeInBlock;
pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
@@ -903,7 +909,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
@@ -942,12 +948,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
- CoordEq metaEq;
-
- GetMetaEquation(&metaEq, pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
- Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
- compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2);
+ const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+ Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+ compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2});
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -957,7 +961,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
pOut->addr = address >> 1;
@@ -1184,16 +1188,18 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
m_settings.isArcticIsland = 1;
m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
- if (m_settings.isVega10)
+ m_settings.isDce12 = 1;
+
+ if (m_settings.isVega10 == 0)
{
- m_settings.isDce12 = 1;
+ m_settings.htileAlignFix = 1;
+ m_settings.applyAliasFix = 1;
}
m_settings.metaBaseAlignFix = 1;
m_settings.depthPipeXorDisable = 1;
break;
-
case FAMILY_RV:
m_settings.isArcticIsland = 1;
m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision);
@@ -1205,7 +1211,10 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
m_settings.metaBaseAlignFix = 1;
- m_settings.depthPipeXorDisable = 1;
+ if (ASICREV_IS_RAVEN(uChipRevision))
+ {
+ m_settings.depthPipeXorDisable = 1;
+ }
break;
default:
@@ -1230,6 +1239,7 @@ VOID Gfx9Lib::GetRbEquation(
CoordEq* pRbEq, ///< [out] rb equation
UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
UINT_32 numSeLog2) ///< [in] number of shader engine
+ const
{
// RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
@@ -1250,6 +1260,12 @@ VOID Gfx9Lib::GetRbEquation(
(*pRbEq)[0].add(cy);
cx++;
cy++;
+
+ if (m_settings.applyAliasFix == false)
+ {
+ (*pRbEq)[0].add(cy);
+ }
+
(*pRbEq)[0].add(cy);
start++;
}
@@ -1583,7 +1599,6 @@ VOID Gfx9Lib::GetPipeEquation(
pPipeEq->xorin(xorMask);
}
}
-
/**
************************************************************************************************************************
* Gfx9Lib::GetMetaEquation
@@ -1591,29 +1606,86 @@ VOID Gfx9Lib::GetPipeEquation(
* @brief
* Get meta equation for cmask/htile/DCC
* @return
+* Pointer to a calculated meta equation
+************************************************************************************************************************
+*/
+const CoordEq* Gfx9Lib::GetMetaEquation(
+ const MetaEqParams& metaEqParams)
+{
+ UINT_32 cachedMetaEqIndex;
+
+ for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
+ {
+ if (memcmp(&metaEqParams,
+ &m_cachedMetaEqKey[cachedMetaEqIndex],
+ static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
+ {
+ break;
+ }
+ }
+
+ CoordEq* pMetaEq = NULL;
+
+ if (cachedMetaEqIndex < MaxCachedMetaEq)
+ {
+ pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
+ }
+ else
+ {
+ m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
+
+ pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
+
+ m_metaEqOverrideIndex %= MaxCachedMetaEq;
+
+ GenMetaEquation(pMetaEq,
+ metaEqParams.maxMip,
+ metaEqParams.elementBytesLog2,
+ metaEqParams.numSamplesLog2,
+ metaEqParams.metaFlag,
+ metaEqParams.dataSurfaceType,
+ metaEqParams.swizzleMode,
+ metaEqParams.resourceType,
+ metaEqParams.metaBlkWidthLog2,
+ metaEqParams.metaBlkHeightLog2,
+ metaEqParams.metaBlkDepthLog2,
+ metaEqParams.compBlkWidthLog2,
+ metaEqParams.compBlkHeightLog2,
+ metaEqParams.compBlkDepthLog2);
+ }
+
+ return pMetaEq;
+}
+
+/**
+************************************************************************************************************************
+* Gfx9Lib::GenMetaEquation
+*
+* @brief
+* Get meta equation for cmask/htile/DCC
+* @return
* N/A
************************************************************************************************************************
*/
-VOID Gfx9Lib::GetMetaEquation(
- CoordEq* pMetaEq, ///< [out] meta equation
- UINT_32 maxMip, ///< [in] max mip Id
- UINT_32 elementBytesLog2, ///< [in] data surface element bytes
- UINT_32 numSamplesLog2, ///< [in] data surface sample count
- ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
- Gfx9DataType dataSurfaceType, ///< [in] data surface type
- AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
- AddrResourceType resourceType, ///< [in] data surface resource type
- UINT_32 metaBlkWidthLog2, ///< [in] meta block width
- UINT_32 metaBlkHeightLog2, ///< [in] meta block height
- UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
- UINT_32 compBlkWidthLog2, ///< [in] compress block width
- UINT_32 compBlkHeightLog2, ///< [in] compress block height
- UINT_32 compBlkDepthLog2) ///< [in] compress block depth
+VOID Gfx9Lib::GenMetaEquation(
+ CoordEq* pMetaEq, ///< [out] meta equation
+ UINT_32 maxMip, ///< [in] max mip Id
+ UINT_32 elementBytesLog2, ///< [in] data surface element bytes
+ UINT_32 numSamplesLog2, ///< [in] data surface sample count
+ ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
+ Gfx9DataType dataSurfaceType, ///< [in] data surface type
+ AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
+ AddrResourceType resourceType, ///< [in] data surface resource type
+ UINT_32 metaBlkWidthLog2, ///< [in] meta block width
+ UINT_32 metaBlkHeightLog2, ///< [in] meta block height
+ UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
+ UINT_32 compBlkWidthLog2, ///< [in] compress block width
+ UINT_32 compBlkHeightLog2, ///< [in] compress block height
+ UINT_32 compBlkDepthLog2) ///< [in] compress block depth
const
{
- UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
+ UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
- //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
// Get the correct data address and rb equation
CoordEq dataEq;
@@ -1769,16 +1841,15 @@ VOID Gfx9Lib::GetMetaEquation(
}
}
- UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
- UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
- CoordEq origRbEquation;
+ const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
+ const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
+ const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
+ CoordEq origRbEquation;
GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
CoordEq rbEquation = origRbEquation;
- UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
-
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
@@ -1790,18 +1861,41 @@ VOID Gfx9Lib::GetMetaEquation(
}
}
+ if (m_settings.applyAliasFix)
+ {
+ co.set('z', -1);
+ }
+
// Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
{
- if (rbEquation[i] == pipeEquation[j])
+ BOOL_32 isRbEquationInPipeEquation = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ CoordTerm filteredPipeEq;
+ filteredPipeEq = pipeEquation[j];
+
+ filteredPipeEq.Filter('>', co, 0, 'z');
+
+ isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
+ }
+ else
+ {
+ isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
+ }
+
+ if (isRbEquationInPipeEquation)
{
rbEquation[i].Clear();
}
}
}
+ bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
+
// Loop through each bit of the channel, get the smallest coordinate,
// and remove it from the metaaddr, and rb_equation
for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
@@ -1827,6 +1921,7 @@ VOID Gfx9Lib::GetMetaEquation(
if (pipeEquation[i][k] != co)
{
rbEquation[j].add(pipeEquation[i][k]);
+ rbAppendedWithPipeBits[j] = true;
}
}
}
@@ -1838,7 +1933,18 @@ VOID Gfx9Lib::GetMetaEquation(
UINT_32 rbBitsLeft = 0;
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
- if (rbEquation[i].getsize() > 0)
+ BOOL_32 isRbEqAppended = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
+ }
+ else
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > 0);
+ }
+
+ if (isRbEqAppended)
{
rbBitsLeft++;
rbEquation[i].getsmallest(co);
@@ -1860,6 +1966,7 @@ VOID Gfx9Lib::GetMetaEquation(
if (rbEquation[i][k] != co)
{
rbEquation[j].add(rbEquation[i][k]);
+ rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
}
}
}
@@ -1905,7 +2012,18 @@ VOID Gfx9Lib::GetMetaEquation(
// Put in remaining rb bits
for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
{
- if (rbEquation[i].getsize() > 0)
+ BOOL_32 isRbEqAppended = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
+ }
+ else
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > 0);
+ }
+
+ if (isRbEqAppended)
{
origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
// Mark any rb bit we add in to the rb mask
@@ -2717,7 +2835,8 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
{
BOOL_32 support = FALSE;
- //const AddrResourceType resourceType = pIn->resourceType;
+ const AddrResourceType resourceType = pIn->resourceType;
+ (void)resourceType;
const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
if (m_settings.isDce12)
@@ -3059,6 +3178,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
};
+ enum AddrSwSet
+ {
+ AddrSwSetZ = 1 << ADDR_SW_Z,
+ AddrSwSetS = 1 << ADDR_SW_S,
+ AddrSwSetD = 1 << ADDR_SW_D,
+ AddrSwSetR = 1 << ADDR_SW_R,
+
+ AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
+ };
+
ADDR_E_RETURNCODE returnCode = ADDR_OK;
ElemLib* pElemLib = GetElemLib();
@@ -3109,10 +3238,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
pOut->resourceType = pIn->resourceType;
}
- ADDR_ASSERT(bpp >= 8u);
- UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
+ if (bpp < 8)
+ {
+ ADDR_ASSERT_ALWAYS();
- if (IsTex1d(pOut->resourceType))
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else if (IsTex1d(pOut->resourceType))
{
pOut->swizzleMode = ADDR_SW_LINEAR;
pOut->validBlockSet.value = AddrBlockSetLinear;
@@ -3123,7 +3255,15 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
ADDR2_BLOCK_SET blockSet;
blockSet.value = 0;
- AddrSwType swType = ADDR_SW_S;
+ ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet;
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet = addrPreferredSwSet;
+ clientPreferredSwSet = pIn->preferredSwSet;
+
+ if (clientPreferredSwSet.value == 0)
+ {
+ clientPreferredSwSet.value = AddrSwSetAll;
+ }
// prt Xor and non-xor will have less height align requirement for stereo surface
BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
@@ -3135,8 +3275,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
{
ADDR_ASSERT(IsTex2d(pOut->resourceType));
- blockSet.value = AddrBlockSetMacro;
- swType = ADDR_SW_Z;
+ blockSet.value = AddrBlockSetMacro;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ;
if (pIn->flags.depth && pIn->flags.texture)
{
@@ -3153,9 +3294,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
}
else if (ElemLib::IsBlockCompressed(pIn->format))
{
- // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. Not sure
- // under what circumstances "_D" would be appropriate as these formats are not
- // displayable.
+ // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
+ // Not sure under what circumstances "_D" would be appropriate as these formats
+ // are not displayable.
blockSet.value = AddrBlockSetMacro;
// This isn't to be used as texture and caller doesn't allow macro tiled.
@@ -3164,15 +3305,19 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
{
blockSet.value |= AddrBlockSetLinear;
}
- swType = ADDR_SW_D;
+
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
}
else if (ElemLib::IsMacroPixelPacked(pIn->format))
{
- // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. Its not
- // clear under what circumstances the D or R modes would be appropriate since
- // these formats are not displayable.
- blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
- swType = ADDR_SW_S;
+ // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
+ // Its notclear under what circumstances the D or R modes would be appropriate
+ // since these formats are not displayable.
+ blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
+
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
}
else if (IsTex3d(pOut->resourceType))
{
@@ -3181,28 +3326,38 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (pIn->flags.prt)
{
// PRT cannot use SW_D which gives an unexpected block dimension
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
}
else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
{
// When depth (Z) is the maximum dimension then must use one of the SW_*_S
// or SW_*_Z modes if mipmapping is desired on a 3D surface
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
}
else if (pIn->flags.color)
{
- swType = ADDR_SW_D;
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS | AddrSwSetD;
}
else
{
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetD;
+ if (bpp != 128)
+ {
+ addrValidSwSet.value |= AddrSwSetS;
+ }
}
}
else
{
- swType = ((pIn->flags.display == TRUE) ||
- (pIn->flags.overlay == TRUE) ||
- (pIn->bpp == 128)) ? ADDR_SW_D : ADDR_SW_S;
+ addrPreferredSwSet.value = ((pIn->flags.display == TRUE) ||
+ (pIn->flags.overlay == TRUE) ||
+ (pIn->bpp == 128)) ? AddrSwSetD : AddrSwSetS;
+
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
if (numMipLevels > 1)
{
@@ -3223,7 +3378,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (displayResource)
{
- swType = pIn->flags.rotated ? ADDR_SW_R : ADDR_SW_D;
+ addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD;
if (pIn->bpp > 64)
{
@@ -3238,17 +3393,21 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
// DCE12 does not support display surface to be _T swizzle mode
prtXor = FALSE;
+
+ addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
}
else if (m_settings.isDcn1)
{
// _R is not supported by Dcn1
if (pIn->bpp == 64)
{
- swType = ADDR_SW_D;
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetD;
}
else
{
- swType = ADDR_SW_S;
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
}
blockSet.micro = FALSE;
@@ -3262,279 +3421,325 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
}
}
- if ((numFrags > 1) &&
- (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
- {
- // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
- blockSet.macro4KB = FALSE;
- }
+ ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value);
- if (pIn->flags.prt)
- {
- blockSet.value &= AddrBlockSetMacro64KB;
- }
+ pOut->clientPreferredSwSet = clientPreferredSwSet;
+
+ // Clamp client preferred set to valid set
+ clientPreferredSwSet.value &= addrValidSwSet.value;
- // Apply customized forbidden setting
- blockSet.value &= ~pIn->forbiddenBlock.value;
+ pOut->validSwTypeSet = addrValidSwSet;
- if (pIn->maxAlign > 0)
+ if (clientPreferredSwSet.value == 0)
{
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
+ // Client asks for an invalid swizzle type...
+ ADDR_ASSERT_ALWAYS();
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ if (IsPow2(clientPreferredSwSet.value))
+ {
+ // Only one swizzle type left, use it directly
+ addrPreferredSwSet.value = clientPreferredSwSet.value;
+ }
+ else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0)
{
- blockSet.macro64KB = FALSE;
+ // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
+ if (clientPreferredSwSet.sw_D)
+ {
+ addrPreferredSwSet.value = AddrSwSetD;
+ }
+ else if (clientPreferredSwSet.sw_Z)
+ {
+ addrPreferredSwSet.value = AddrSwSetZ;
+ }
+ else if (clientPreferredSwSet.sw_R)
+ {
+ addrPreferredSwSet.value = AddrSwSetR;
+ }
+ else
+ {
+ ADDR_ASSERT(clientPreferredSwSet.sw_S);
+ addrPreferredSwSet.value = AddrSwSetS;
+ }
}
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
+ if ((numFrags > 1) &&
+ (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
{
+ // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
blockSet.macro4KB = FALSE;
}
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
+ if (pIn->flags.prt)
{
- blockSet.micro = FALSE;
+ blockSet.value &= AddrBlockSetMacro64KB;
}
- }
- Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
- Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
- UINT_64 padSize[AddrBlockMaxTiledType] = {0};
+ // Apply customized forbidden setting
+ blockSet.value &= ~pIn->forbiddenBlock.value;
- if (blockSet.micro)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
- &blkAlign[AddrBlockMicro].h,
- &blkAlign[AddrBlockMicro].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_256B);
-
- if (returnCode == ADDR_OK)
+ if (pIn->maxAlign > 0)
{
- if (displayResource)
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
{
- blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
+ blockSet.macro64KB = FALSE;
}
- else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
- (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
+
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
{
- // If one 256B block can contain the surface, don't bother bigger block type
blockSet.macro4KB = FALSE;
- blockSet.macro64KB = FALSE;
- blockSet.var = FALSE;
}
- padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
- slice, &paddedDim[AddrBlockMicro]);
- }
- }
-
- if ((returnCode == ADDR_OK) && blockSet.macro4KB)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
- &blkAlign[AddrBlock4KB].h,
- &blkAlign[AddrBlock4KB].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_4KB);
-
- if (returnCode == ADDR_OK)
- {
- if (displayResource)
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
{
- blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
+ blockSet.micro = FALSE;
}
-
- padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
- slice, &paddedDim[AddrBlock4KB]);
-
- ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
}
- }
- if ((returnCode == ADDR_OK) && blockSet.macro64KB)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
- &blkAlign[AddrBlock64KB].h,
- &blkAlign[AddrBlock64KB].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_64KB);
+ Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
+ Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
+ UINT_64 padSize[AddrBlockMaxTiledType] = {0};
- if (returnCode == ADDR_OK)
+ if (blockSet.micro)
{
- if (displayResource)
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
+ &blkAlign[AddrBlockMicro].h,
+ &blkAlign[AddrBlockMicro].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_256B);
+
+ if (returnCode == ADDR_OK)
{
- blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
- }
-
- padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
- slice, &paddedDim[AddrBlock64KB]);
-
- ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
- ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
- }
- }
+ if (displayResource)
+ {
+ blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
+ }
+ else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
+ (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
+ {
+ // If one 256B block can contain the surface, don't bother bigger block type
+ blockSet.macro4KB = FALSE;
+ blockSet.macro64KB = FALSE;
+ blockSet.var = FALSE;
+ }
- if (returnCode == ADDR_OK)
- {
- for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
- {
- padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
+ padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
+ slice, &paddedDim[AddrBlockMicro]);
+ }
}
- // Use minimum block type which meets all conditions above if flag minimizeAlign was set
- if (pIn->flags.minimizeAlign)
+ if ((returnCode == ADDR_OK) && blockSet.macro4KB)
{
- // If padded size of 64KB block is larger than padded size of 256B block or 4KB
- // block, filter out 64KB block from candidate list
- if (blockSet.macro64KB &&
- ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
- (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
+ &blkAlign[AddrBlock4KB].h,
+ &blkAlign[AddrBlock4KB].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_4KB);
+
+ if (returnCode == ADDR_OK)
{
- blockSet.macro64KB = FALSE;
- }
+ if (displayResource)
+ {
+ blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
+ }
- // If padded size of 4KB block is larger than padded size of 256B block,
- // filter out 4KB block from candidate list
- if (blockSet.macro4KB &&
- blockSet.micro &&
- (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
- {
- blockSet.macro4KB = FALSE;
+ padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
+ slice, &paddedDim[AddrBlock4KB]);
+
+ ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
}
}
- // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
- else if (pIn->flags.opt4space)
- {
- UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
- (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
- threshold += threshold >> 1;
-
- if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
+ if ((returnCode == ADDR_OK) && blockSet.macro64KB)
+ {
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
+ &blkAlign[AddrBlock64KB].h,
+ &blkAlign[AddrBlock64KB].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_64KB);
+
+ if (returnCode == ADDR_OK)
{
- blockSet.macro64KB = FALSE;
- }
+ if (displayResource)
+ {
+ blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
+ }
- if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
- {
- blockSet.macro4KB = FALSE;
+ padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
+ slice, &paddedDim[AddrBlock64KB]);
+
+ ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
+ ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
}
}
- else
+
+ if (returnCode == ADDR_OK)
{
- if (blockSet.macro64KB &&
- (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
- ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
+ UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
+
+ for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
{
- // If 64KB block waste more than half memory on padding, filter it out from
- // candidate list when it is not the only choice left
- blockSet.macro64KB = FALSE;
+ padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
}
- }
- if (blockSet.value == 0)
- {
- // Bad things happen, client will not get any useful information from AddrLib.
- // Maybe we should fill in some output earlier instead of outputing nothing?
- ADDR_ASSERT_ALWAYS();
- returnCode = ADDR_INVALIDPARAMS;
- }
- else
- {
- pOut->validBlockSet = blockSet;
- pOut->canXor = pOut->canXor &&
- (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
-
- if (blockSet.macro64KB || blockSet.macro4KB)
+ // Use minimum block type which meets all conditions above if flag minimizeAlign was set
+ if (pIn->flags.minimizeAlign)
{
- if (swType == ADDR_SW_Z)
- {
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
- }
- else if (swType == ADDR_SW_S)
+ // If padded size of 64KB block is larger than padded size of 256B block or 4KB
+ // block, filter out 64KB block from candidate list
+ if (blockSet.macro64KB &&
+ ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
+ (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
{
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
+ blockSet.macro64KB = FALSE;
}
- else if (swType == ADDR_SW_D)
+
+ // If padded size of 4KB block is larger than padded size of 256B block,
+ // filter out 4KB block from candidate list
+ if (blockSet.macro4KB &&
+ blockSet.micro &&
+ (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
{
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
+ blockSet.macro4KB = FALSE;
}
- else
+ }
+ // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
+ else if (pIn->flags.opt4space)
+ {
+ UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
+ (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
+
+ threshold += threshold >> 1;
+
+ if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
{
- ADDR_ASSERT(swType == ADDR_SW_R);
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
+ blockSet.macro64KB = FALSE;
}
- if (prtXor && blockSet.macro64KB)
+ if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
{
- // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
- const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
- pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
+ blockSet.macro4KB = FALSE;
}
- else if (pOut->canXor)
+ }
+ else
+ {
+ if (blockSet.macro64KB &&
+ (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
+ ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
{
- // Client wants XOR and this is allowed, return XOR version swizzle mode
- const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
- pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
+ // If 64KB block waste more than half memory on padding, filter it out from
+ // candidate list when it is not the only choice left
+ blockSet.macro64KB = FALSE;
}
}
- else if (blockSet.micro)
+
+ if (blockSet.value == 0)
+ {
+ // Bad things happen, client will not get any useful information from AddrLib.
+ // Maybe we should fill in some output earlier instead of outputing nothing?
+ ADDR_ASSERT_ALWAYS();
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
{
- if (swType == ADDR_SW_S)
+ pOut->validBlockSet = blockSet;
+ pOut->canXor = pOut->canXor &&
+ (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
+
+ if (blockSet.macro64KB || blockSet.macro4KB)
+ {
+ if (addrPreferredSwSet.value == AddrSwSetZ)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetS)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetD)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
+ }
+ else
+ {
+ ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
+ }
+
+ if (prtXor && blockSet.macro64KB)
+ {
+ // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
+ const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
+ pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
+ }
+ else if (pOut->canXor)
+ {
+ // Client wants XOR and this is allowed, return XOR version swizzle mode
+ const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
+ pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
+ }
+ }
+ else if (blockSet.micro)
{
- pOut->swizzleMode = ADDR_SW_256B_S;
+ if (addrPreferredSwSet.value == AddrSwSetS)
+ {
+ pOut->swizzleMode = ADDR_SW_256B_S;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetD)
+ {
+ pOut->swizzleMode = ADDR_SW_256B_D;
+ }
+ else
+ {
+ ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
+ pOut->swizzleMode = ADDR_SW_256B_R;
+ }
}
- else if (swType == ADDR_SW_D)
+ else if (blockSet.linear)
{
- pOut->swizzleMode = ADDR_SW_256B_D;
+ // Fall into this branch doesn't mean linear is suitable, only no other choices!
+ pOut->swizzleMode = ADDR_SW_LINEAR;
}
else
{
- ADDR_ASSERT(swType == ADDR_SW_R);
- pOut->swizzleMode = ADDR_SW_256B_R;
- }
- }
- else if (blockSet.linear)
- {
- // Fall into this branch doesn't mean linear is suitable, only no other choices!
- pOut->swizzleMode = ADDR_SW_LINEAR;
- }
- else
- {
- ADDR_ASSERT(blockSet.var);
+ ADDR_ASSERT(blockSet.var);
- // Designer consider VAR swizzle mode is usless for most cases
- ADDR_UNHANDLED_CASE();
+ // Designer consider VAR swizzle mode is usless for most cases
+ ADDR_UNHANDLED_CASE();
- returnCode = ADDR_NOTSUPPORTED;
- }
+ returnCode = ADDR_NOTSUPPORTED;
+ }
#if DEBUG
- // Post sanity check, at least AddrLib should accept the output generated by its own
- if (pOut->swizzleMode != ADDR_SW_LINEAR)
- {
- ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
- localIn.flags = pIn->flags;
- localIn.swizzleMode = pOut->swizzleMode;
- localIn.resourceType = pOut->resourceType;
- localIn.format = pIn->format;
- localIn.bpp = bpp;
- localIn.width = width;
- localIn.height = height;
- localIn.numSlices = slice;
- localIn.numMipLevels = numMipLevels;
- localIn.numSamples = numSamples;
- localIn.numFrags = numFrags;
-
- HwlComputeSurfaceInfoSanityCheck(&localIn);
-
- // TODO : check all valid block type available in validBlockSet?
- }
+ // Post sanity check, at least AddrLib should accept the output generated by its own
+ if (pOut->swizzleMode != ADDR_SW_LINEAR)
+ {
+ ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
+ localIn.flags = pIn->flags;
+ localIn.swizzleMode = pOut->swizzleMode;
+ localIn.resourceType = pOut->resourceType;
+ localIn.format = pIn->format;
+ localIn.bpp = bpp;
+ localIn.width = width;
+ localIn.height = height;
+ localIn.numSlices = slice;
+ localIn.numMipLevels = numMipLevels;
+ localIn.numSamples = numSamples;
+ localIn.numFrags = numFrags;
+
+ HwlComputeSurfaceInfoSanityCheck(&localIn);
+
+ }
#endif
+ }
}
}
}
@@ -3709,53 +3914,48 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
- pOut->epitchIsHeight = FALSE;
- pOut->mipChainInTail = FALSE;
+ pOut->epitchIsHeight = FALSE;
+ pOut->mipChainInTail = FALSE;
+ pOut->firstMipIdInTail = pIn->numMipLevels;
- pOut->mipChainPitch = pOut->pitch;
- pOut->mipChainHeight = pOut->height;
- pOut->mipChainSlice = pOut->numSlices;
+ pOut->mipChainPitch = pOut->pitch;
+ pOut->mipChainHeight = pOut->height;
+ pOut->mipChainSlice = pOut->numSlices;
if (pIn->numMipLevels > 1)
{
- UINT_32 numMipLevel;
- ADDR2_MIP_INFO *pMipInfo;
- ADDR2_MIP_INFO mipInfo[4];
-
- if (pOut->pMipInfo != NULL)
+ pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
+ pIn->swizzleMode,
+ pIn->bpp,
+ pIn->width,
+ pIn->height,
+ pIn->numSlices,
+ pOut->blockWidth,
+ pOut->blockHeight,
+ pOut->blockSlices,
+ pIn->numMipLevels,
+ pOut->pMipInfo);
+
+ const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
+
+ if (endingMipId == 0)
{
- pMipInfo = pOut->pMipInfo;
- numMipLevel = pIn->numMipLevels;
- }
- else
- {
- pMipInfo = mipInfo;
- numMipLevel = Min(pIn->numMipLevels, 4u);
- }
+ const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
+ pIn->swizzleMode,
+ pOut->blockWidth,
+ pOut->blockHeight,
+ pOut->blockSlices);
- UINT_32 endingMip = GetMipChainInfo(pIn->resourceType,
- pIn->swizzleMode,
- pIn->bpp,
- pIn->width,
- pIn->height,
- pIn->numSlices,
- pOut->blockWidth,
- pOut->blockHeight,
- pOut->blockSlices,
- numMipLevel,
- pMipInfo);
-
- if (endingMip == 0)
- {
pOut->epitchIsHeight = TRUE;
- pOut->pitch = pMipInfo[0].pitch;
- pOut->height = pMipInfo[0].height;
- pOut->numSlices = pMipInfo[0].depth;
+ pOut->pitch = tailMaxDim.w;
+ pOut->height = tailMaxDim.h;
+ pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
+ tailMaxDim.d : pIn->numSlices;
pOut->mipChainInTail = TRUE;
}
else
{
- UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
+ UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
@@ -3767,7 +3967,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
{
UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
- if ((mip1WidthInBlk == 1) && (endingMip > 2))
+ if ((mip1WidthInBlk == 1) && (endingMipId > 2))
{
mip1WidthInBlk++;
}
@@ -3780,7 +3980,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
{
UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
- if ((mip1HeightInBlk == 1) && (endingMip > 2))
+ if ((mip1HeightInBlk == 1) && (endingMipId > 2))
{
mip1HeightInBlk++;
}
@@ -3821,22 +4021,22 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
UINT_64 macroBlockOffset =
blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
- pMipInfo[i].macroBlockOffset = macroBlockOffset;
- pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
+ pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
+ pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
}
}
}
else if (pOut->pMipInfo != NULL)
{
- pOut->pMipInfo[0].pitch = pOut->pitch;
+ pOut->pMipInfo[0].pitch = pOut->pitch;
pOut->pMipInfo[0].height = pOut->height;
- pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
+ pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
pOut->pMipInfo[0].offset = 0;
}
pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
(pIn->bpp >> 3) * pIn->numFrags;
- pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
+ pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
if (pIn->flags.prt)
@@ -3851,6 +4051,95 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
/**
************************************************************************************************************************
+* Gfx9Lib::HwlComputeSurfaceInfoLinear
+*
+* @brief
+* Internal function to calculate alignment for linear surface
+*
+* @return
+* ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+ UINT_32 pitch = 0;
+ UINT_32 actualHeight = 0;
+ UINT_32 elementBytes = pIn->bpp >> 3;
+ const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
+
+ if (IsTex1d(pIn->resourceType))
+ {
+ if (pIn->height > 1)
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ const UINT_32 pitchAlignInElement = alignment / elementBytes;
+
+ pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
+ actualHeight = pIn->numMipLevels;
+
+ if (pIn->flags.prt == FALSE)
+ {
+ returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
+ &pitch, &actualHeight);
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ if (pOut->pMipInfo != NULL)
+ {
+ for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+ {
+ pOut->pMipInfo[i].offset = pitch * elementBytes * i;
+ pOut->pMipInfo[i].pitch = pitch;
+ pOut->pMipInfo[i].height = 1;
+ pOut->pMipInfo[i].depth = 1;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
+ }
+
+ if ((pitch == 0) || (actualHeight == 0))
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ pOut->pitch = pitch;
+ pOut->height = pIn->height;
+ pOut->numSlices = pIn->numSlices;
+ pOut->mipChainPitch = pitch;
+ pOut->mipChainHeight = actualHeight;
+ pOut->mipChainSlice = pOut->numSlices;
+ pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
+ pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
+ pOut->surfSize = pOut->sliceSize * pOut->numSlices;
+ pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
+ pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
+ pOut->blockHeight = 1;
+ pOut->blockSlices = 1;
+ }
+
+ // Post calculation validate
+ ADDR_ASSERT(pOut->sliceSize > 0);
+
+ return returnCode;
+}
+
+/**
+************************************************************************************************************************
* Gfx9Lib::GetMipChainInfo
*
* @brief
@@ -3876,16 +4165,15 @@ UINT_32 Gfx9Lib::GetMipChainInfo(
const Dim3d tailMaxDim =
GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
- UINT_32 mipPitch = mip0Width;
- UINT_32 mipHeight = mip0Height;
- UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
- UINT_32 offset = 0;
- UINT_32 endingMip = numMipLevel - 1;
- BOOL_32 inTail = FALSE;
- BOOL_32 finalDim = FALSE;
-
- BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
- BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
+ UINT_32 mipPitch = mip0Width;
+ UINT_32 mipHeight = mip0Height;
+ UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
+ UINT_32 offset = 0;
+ UINT_32 firstMipIdInTail = numMipLevel;
+ BOOL_32 inTail = FALSE;
+ BOOL_32 finalDim = FALSE;
+ BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
+ BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
{
@@ -3931,10 +4219,9 @@ UINT_32 Gfx9Lib::GetMipChainInfo(
if (inTail)
{
- endingMip = mipId;
-
- mipPitch = tailMaxDim.w;
- mipHeight = tailMaxDim.h;
+ firstMipIdInTail = mipId;
+ mipPitch = tailMaxDim.w;
+ mipHeight = tailMaxDim.h;
if (is3dThick)
{
@@ -3953,10 +4240,14 @@ UINT_32 Gfx9Lib::GetMipChainInfo(
}
}
- pMipInfo[mipId].pitch = mipPitch;
- pMipInfo[mipId].height = mipHeight;
- pMipInfo[mipId].depth = mipDepth;
- pMipInfo[mipId].offset = offset;
+ if (pMipInfo != NULL)
+ {
+ pMipInfo[mipId].pitch = mipPitch;
+ pMipInfo[mipId].height = mipHeight;
+ pMipInfo[mipId].depth = mipDepth;
+ pMipInfo[mipId].offset = offset;
+ }
+
offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
if (finalDim)
@@ -3978,7 +4269,7 @@ UINT_32 Gfx9Lib::GetMipChainInfo(
}
}
- return endingMip;
+ return firstMipIdInTail;
}
/**
@@ -3999,7 +4290,7 @@ VOID Gfx9Lib::GetMetaMiptailInfo(
Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
) const
{
- BOOL_32 isThick = (pMetaBlkDim->d > 1);
+ BOOL_32 isThick = (pMetaBlkDim->d > 1);
UINT_32 mipWidth = pMetaBlkDim->w;
UINT_32 mipHeight = pMetaBlkDim->h >> 1;
UINT_32 mipDepth = pMetaBlkDim->d;
@@ -4557,5 +4848,72 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
return returnCode;
}
+/**
+************************************************************************************************************************
+* Gfx9Lib::ComputeSurfaceInfoLinear
+*
+* @brief
+* Internal function to calculate padding for linear swizzle 2D/3D surface
+*
+* @return
+* N/A
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
+ UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
+ UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
+ ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ UINT_32 elementBytes = pIn->bpp >> 3;
+ UINT_32 pitchAlignInElement = 0;
+
+ if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
+ {
+ ADDR_ASSERT(pIn->numMipLevels <= 1);
+ ADDR_ASSERT(pIn->numSlices <= 1);
+ pitchAlignInElement = 1;
+ }
+ else
+ {
+ pitchAlignInElement = (256 / elementBytes);
+ }
+
+ UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
+ UINT_32 slice0PaddedHeight = pIn->height;
+
+ returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
+ &mipChainWidth, &slice0PaddedHeight);
+
+ if (returnCode == ADDR_OK)
+ {
+ UINT_32 mipChainHeight = 0;
+ UINT_32 mipHeight = pIn->height;
+
+ for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+ {
+ if (pMipInfo != NULL)
+ {
+ pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
+ pMipInfo[i].pitch = mipChainWidth;
+ pMipInfo[i].height = mipHeight;
+ pMipInfo[i].depth = 1;
+ }
+
+ mipChainHeight += mipHeight;
+ mipHeight = RoundHalf(mipHeight);
+ mipHeight = Max(mipHeight, 1u);
+ }
+
+ *pMipmap0PaddedWidth = mipChainWidth;
+ *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
+ }
+
+ return returnCode;
+}
+
} // V2
} // Addr
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.h b/src/amd/addrlib/gfx9/gfx9addrlib.h
index 418ccac5142..1f233a4ff91 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.h
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.h
@@ -65,7 +65,9 @@ struct Gfx9ChipSettings
// Misc configuration bits
UINT_32 metaBaseAlignFix : 1;
UINT_32 depthPipeXorDisable : 1;
- UINT_32 reserved2 : 30;
+ UINT_32 htileAlignFix : 1;
+ UINT_32 applyAliasFix : 1;
+ UINT_32 reserved2 : 28;
};
};
@@ -83,6 +85,28 @@ enum Gfx9DataType
/**
************************************************************************************************************************
+* @brief GFX9 meta equation parameters
+************************************************************************************************************************
+*/
+struct MetaEqParams
+{
+ UINT_32 maxMip;
+ UINT_32 elementBytesLog2;
+ UINT_32 numSamplesLog2;
+ ADDR2_META_FLAGS metaFlag;
+ Gfx9DataType dataSurfaceType;
+ AddrSwizzleMode swizzleMode;
+ AddrResourceType resourceType;
+ UINT_32 metaBlkWidthLog2;
+ UINT_32 metaBlkHeightLog2;
+ UINT_32 metaBlkDepthLog2;
+ UINT_32 compBlkWidthLog2;
+ UINT_32 compBlkHeightLog2;
+ UINT_32 compBlkDepthLog2;
+};
+
+/**
+************************************************************************************************************************
* @brief This class is the GFX9 specific address library
* function set.
************************************************************************************************************************
@@ -139,31 +163,31 @@ protected:
virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
- ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
- ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
- const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
- const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
- const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+ ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
- const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut);
virtual UINT_32 HwlGetEquationIndex(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
@@ -206,17 +230,7 @@ protected:
if (IsXor(swizzleMode))
{
- if (m_settings.isVega10 || m_settings.isRaven)
- {
- baseAlign = GetBlockSize(swizzleMode);
- }
- else
- {
- UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
- UINT_32 pipeBits = GetPipeXorBits(blockSizeLog2);
- UINT_32 bankBits = GetBankXorBits(blockSizeLog2);
- baseAlign = 1 << (Min(blockSizeLog2, m_pipeInterleaveLog2 + pipeBits+ bankBits));
- }
+ baseAlign = GetBlockSize(swizzleMode);
}
else
{
@@ -249,6 +263,10 @@ protected:
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
@@ -352,6 +370,10 @@ protected:
return compressBlkDim;
}
+
+ static const UINT_32 MaxSeLog2 = 3;
+ static const UINT_32 MaxRbPerSeLog2 = 2;
+
static const Dim3d Block256_3dS[MaxNumOfBpp];
static const Dim3d Block256_3dZ[MaxNumOfBpp];
@@ -375,6 +397,8 @@ protected:
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];
+ static const UINT_32 MaxCachedMetaEq = 2;
+
private:
virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(
ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;
@@ -382,7 +406,7 @@ private:
virtual BOOL_32 HwlInitGlobalParams(
const ADDR_CREATE_INPUT* pCreateIn);
- static VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2);
+ VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;
VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
@@ -393,7 +417,7 @@ private:
UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;
- VOID GetMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
+ VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
@@ -401,6 +425,8 @@ private:
UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;
+ const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);
+
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
@@ -408,7 +434,17 @@ private:
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
+ ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ UINT_32* pMipmap0PaddedWidth,
+ UINT_32* pSlice0PaddedHeight,
+ ADDR2_MIP_INFO* pMipInfo = NULL) const;
+
Gfx9ChipSettings m_settings;
+
+ CoordEq m_cachedMetaEq[MaxCachedMetaEq];
+ MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq];
+ UINT_32 m_metaEqOverrideIndex;
};
} // V2
diff --git a/src/amd/addrlib/gfx9/rbmap.cpp b/src/amd/addrlib/gfx9/rbmap.cpp
deleted file mode 100644
index 789140d7c0e..00000000000
--- a/src/amd/addrlib/gfx9/rbmap.cpp
+++ /dev/null
@@ -1,1388 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-// This class generates rb id map based rb id equations
-
-//#define DPI_DEBUG 1
-// Unlock more verbose debug messages (V* borrows from dj -v * to indicate most verbosity)
-//#define DPI_DEBUG_V4 1
-//#define DPI_DEBUG_V5 1
-//#define DPI_DEBUG_PIPE_CASES 1
-// "----+----|----+----|----+----|----+----|"
-#include "addrcommon.h"
-#include "rbmap.h"
-
-RB_MAP::RB_MAP(void)
-{
- Initialize();
-}
-
-VOID RB_MAP::Get_Comp_Block_Screen_Space( CoordEq& addr, int bytes_log2, int* w, int* h, int* d)
-{
- int n, i;
- if( w ) *w = 0;
- if( h ) *h = 0;
- if( d ) *d = 0;
- for( n=0; n<bytes_log2; n++ ) { // go up to the bytes_log2 bit
- for( i=0; (unsigned)i<addr[n].getsize(); i++ ) {
- char dim = addr[n][i].getdim();
- int ord = addr[n][i].getord();
- if( w && dim == 'x' && ord >= *w ) *w = ord+1;
- if( h && dim == 'y' && ord >= *h ) *h = ord+1;
- if( d && dim == 'z' && ord >= *d ) *d = ord+1;
- }
- }
-}
-
-void
-RB_MAP::Get_Meta_Block_Screen_Space( int num_comp_blocks_log2, bool is_thick, bool y_biased,
- int comp_block_width_log2, int comp_block_height_log2, int comp_block_depth_log2,
-
- // Outputs
- int& meta_block_width_log2, int& meta_block_height_log2, int& meta_block_depth_log2 )
-{
- meta_block_width_log2 = comp_block_width_log2;
- meta_block_height_log2 = comp_block_height_log2;
- meta_block_depth_log2 = comp_block_depth_log2;
- int n;
-
- for( n=0; n<num_comp_blocks_log2; n++ ) {
- if( (meta_block_height_log2 < meta_block_width_log2) ||
- (y_biased && (meta_block_height_log2 == meta_block_width_log2)) ) {
- if ( !is_thick || (meta_block_height_log2 <= meta_block_depth_log2) )
- meta_block_height_log2++;
- else
- meta_block_depth_log2++;
- }
- else {
- if ( !is_thick || (meta_block_width_log2 <= meta_block_depth_log2) )
- meta_block_width_log2++;
- else
- meta_block_depth_log2++;
- }
- }
-}
-
-void
-RB_MAP::cap_pipe( int xmode, bool is_thick, int& num_ses_log2, int bpp_log2, int num_samples_log2, int pipe_interleave_log2, int& block_size_log2, int& num_pipes_log2 )
-{
- // pipes+SEs can't exceed 32 for now
- if( num_pipes_log2+num_ses_log2 > 5 ) {
- num_pipes_log2 = 5-num_ses_log2;
- }
-
- // Since we are not supporting SE affinity anymore, just add nu_ses to num_pipes, and set num_ses to 0
- num_pipes_log2 += num_ses_log2;
- num_ses_log2 = 0;
-
- // If block size is set to variable (0), compute the size
- if( block_size_log2 == 0 ) {
- //
- //TODO Temporary disable till RTL can drive Var signals properly
- }
-
- if( xmode != NONE ) {
- int max_pipes_log2 = block_size_log2 - pipe_interleave_log2;
- if( is_thick ) {
- // For 3d, treat the num_pipes as the sum of num_pipes and gpus
- num_pipes_log2 = num_pipes_log2 + num_ses_log2;
- num_ses_log2 = 0;
- } else {
- int block_space_used = num_pipes_log2+pipe_interleave_log2;
- if( block_space_used < 10+bpp_log2 ) block_space_used = 10+bpp_log2;
- // if the num gpus exceeds however many bits we have left between block size and block_space_used+num_samples
- // then set num_ses_log2 to 0
- if( num_ses_log2 > block_size_log2 - block_space_used - num_samples_log2) {
- num_pipes_log2 = num_pipes_log2 + num_ses_log2;
- num_ses_log2 = 0;
- }
- }
- if( num_pipes_log2 > max_pipes_log2 ) {
- // If it exceeds the space we have left, cap it to that
- num_pipes_log2 = max_pipes_log2;
- }
- } else {
- num_pipes_log2 = num_pipes_log2 + num_ses_log2;
- num_ses_log2 = 0;
- }
-}
-
-void RB_MAP::Get_Data_Offset_Equation( CoordEq& data_eq, int data_type, int bpp_log2, int num_samples_log2, int block_size_log2 )
-{
- bool is_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
- bool is_color = ( data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_s = ( data_type == DATA_COLOR3D_S );
- Coordinate cx( 'x', 0 );
- Coordinate cy( 'y', 0 );
- Coordinate cz( 'z', 0 );
- Coordinate cs( 's', 0 );
- // Clear the equation
- data_eq.resize(0);
- data_eq.resize(27);
- if( block_size_log2 == 0 ) block_size_log2 = 16;
-
- if( is_linear ) {
- Coordinate cm( 'm', 0 );
- int i;
- data_eq.resize(49);
- for( i=0; i<49; i++ ) {
- data_eq[i].add(cm);
- cm++;
- }
- } else if( is_thick ) {
- // Color 3d (_S and _Z modes; _D is same as color 2d)
- int i;
- if( is_s ) {
- // Standard 3d swizzle
- // Fill in bottom x bits
- for( i=bpp_log2; i<4; i++ ) {
- data_eq[i].add(cx);
- cx++;
- }
- // Fill in 2 bits of y and then z
- for( i=4; i<6; i++ ) {
- data_eq[i].add(cy);
- cy++;
- }
- for( i=6; i<8; i++ ) {
- data_eq[i].add(cz);
- cz++;
- }
- if (bpp_log2 < 2) {
- // fill in z & y bit
- data_eq[8].add(cz);
- data_eq[9].add(cy);
- cz++;
- cy++;
- } else if( bpp_log2 == 2 ) {
- // fill in y and x bit
- data_eq[8].add(cy);
- data_eq[9].add(cx);
- cy++;
- cx++;
- } else {
- // fill in 2 x bits
- data_eq[8].add(cx);
- cx++;
- data_eq[9].add(cx);
- cx++;
- }
- } else {
- // Z 3d swizzle
- int m2d_end = (bpp_log2==0) ? 3 : ((bpp_log2 < 4) ? 4 : 5);
- int num_zs = (bpp_log2==0 || bpp_log2==4) ? 2 : ((bpp_log2==1) ? 3 : 1);
- data_eq.mort2d( cx, cy, bpp_log2, m2d_end );
- for( i=m2d_end+1; i<=m2d_end+num_zs; i++ ) {
- data_eq[i].add(cz);
- cz++;
- }
- if( bpp_log2 == 0 || bpp_log2 == 3 ) {
- // add an x and z
- data_eq[6].add(cx);
- data_eq[7].add(cz);
- cx++;
- cz++;
- } else if( bpp_log2 == 2 ) {
- // add a y and z
- data_eq[6].add(cy);
- data_eq[7].add(cz);
- cy++;
- cz++;
- }
- // add y and x
- data_eq[8].add(cy);
- data_eq[9].add(cx);
- cy++;
- cx++;
- }
- // Fill in bit 10 and up
- data_eq.mort3d( cz, cy, cx, 10 );
- } else if( is_color ) {
- // Color 2D
- int micro_y_bits = (8-bpp_log2) / 2;
- int tile_split_start = block_size_log2 - num_samples_log2;
- int i;
- // Fill in bottom x bits
- for( i=bpp_log2;i<4; i++ ) {
- data_eq[i].add(cx);
- cx++;
- }
- // Fill in bottom y bits
- for( i=4; i<4+micro_y_bits; i++ ) {
- data_eq[i].add(cy);
- cy++;
- }
- // Fill in last of the micro_x bits
- for( i=4+micro_y_bits; i<8; i++ ) {
- data_eq[i].add(cx);
- cx++;
- }
- // Fill in x/y bits below sample split
- data_eq.mort2d( cy, cx, 8, tile_split_start-1 );
- // Fill in sample bits
- for( i=0; i<num_samples_log2; i++ ) {
- cs.set( 's', i );
- data_eq[tile_split_start+i].add(cs);
- }
- // Fill in x/y bits above sample split
- if( (num_samples_log2 & 1) ^ (block_size_log2 & 1) ) data_eq.mort2d( cx, cy, block_size_log2 );
- else data_eq.mort2d( cy, cx, block_size_log2 );
- } else {
- // Z, stencil or fmask
- // First, figure out where each section of bits starts
- int sample_start = bpp_log2;
- int pixel_start = bpp_log2 + num_samples_log2;
- int y_maj_start = 6 + num_samples_log2;
-
- // Put in sample bits
- int s;
- for( s=0; s<num_samples_log2; s++ ) {
- cs.set( 's', s );
- data_eq[sample_start+s].add(cs);
- }
- // Put in the x-major order pixel bits
- data_eq.mort2d( cx, cy, pixel_start, y_maj_start-1 );
- // Put in the y-major order pixel bits
- data_eq.mort2d( cy, cx, y_maj_start );
- }
-}
-
-void RB_MAP::Get_RB_Equation( CoordEq& rb_equation, int num_ses_log2, int num_rbs_log2 )
-{
- // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
- int rb_region = (num_rbs_log2 == 0) ? 5 : 4;
- Coordinate cx( 'x', rb_region );
- Coordinate cy( 'y', rb_region );
- int i, start = 0, num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
- // Clear the rb equation
- rb_equation.resize(0);
- rb_equation.resize(num_total_rbs_log2);
- if( num_ses_log2 > 0 && num_rbs_log2 == 1 ) {
- // Special case when more than 1 SE, and only 1 RB per SE
- rb_equation[0].add(cx);
- rb_equation[0].add(cy);
- cx++;
- cy++;
- rb_equation[0].add(cy);
- start++;
- }
- for( i=0; i<2*(num_total_rbs_log2-start); i++ ) {
- int index = start + (((start+i)>=num_total_rbs_log2) ? 2*(num_total_rbs_log2-start)-i-1 : i);
- Coordinate& c = ((i % 2) == 1) ? cx : cy;
- rb_equation[index].add(c);
- c++;
- }
-}
-
-//void getcheq( CoordEq& pipe_equation, CoordEq& addr, int pipe_interleave_log2, int num_pipes_log2,
-void
-RB_MAP::Get_Pipe_Equation( CoordEq& pipe_equation, CoordEq& addr,
- int pipe_interleave_log2,
- int num_pipes_log2,
-
- int block_size_log2,
- int num_samples_log2,
-
- int xmode, int data_type
- )
-{
- int pipe;
- CoordEq addr_f, xormask, xormask2;
- Coordinate tile_min( 'x', 3 );
-
- bool is_color = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR2D_LINEAR || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
-
- // For color, filter out sample bits only
- // otherwise filter out everything under an 8x8 tile
- if( is_color )
- tile_min.set( 'x', 0 );
-
- addr.copy( addr_f );
-
- // Z/stencil is no longer tile split
- if( is_color )
- addr_f.shift( -num_samples_log2, block_size_log2- num_samples_log2 );
-
- int i;
- addr_f.copy( pipe_equation, pipe_interleave_log2, num_pipes_log2 ); //@todo kr needs num_ses_log2??
-
-
- // This section should only apply to z/stencil, maybe fmask
- // If the pipe bit is below the comp block size, then keep moving up the address until we find a bit that is above
- for( pipe=0; addr_f[pipe_interleave_log2 + pipe][0] < tile_min; pipe++ ) {
- }
-
- // if pipe is 0, then the first pipe bit is above the comp block size, so we don't need to do anything
- // Note, this if condition is not necessary, since if we execute the loop when pipe==0, we will get the same pipe equation
- if ( pipe != 0 ) {
- int j = pipe;
-
-
- for( i=0; i<num_pipes_log2; i++ ) {
- // Copy the jth bit above pipe interleave to the current pipe equation bit
- addr_f[pipe_interleave_log2 + j].copyto(pipe_equation[i]);
- j++;
-
-
- }
-
-
- }
-
- if( xmode == PRT ) {
- // Clear out bits above the block size if prt's are enabled
- addr_f.resize(block_size_log2);
- addr_f.resize(48);
- }
-
- if( xmode != NONE ) {
- if( is_thick ) {
- addr_f.copy( xormask2, pipe_interleave_log2+num_pipes_log2, 2*num_pipes_log2 );
-
- xormask.resize( num_pipes_log2 );
- for( pipe=0; pipe<num_pipes_log2; pipe++ ) {
- xormask[pipe].add( xormask2[2*pipe] );
- xormask[pipe].add( xormask2[2*pipe+1] );
- }
- } else {
- Coordinate co;
- // Xor in the bits above the pipe+gpu bits
- addr_f.copy( xormask, pipe_interleave_log2 + pipe + num_pipes_log2, num_pipes_log2 );
- if( num_samples_log2 == 0 && (xmode != PRT) ) {
- // if 1xaa and not prt, then xor in the z bits
- xormask2.resize(0);
- xormask2.resize(num_pipes_log2);
- for( pipe=0; pipe<num_pipes_log2; pipe++ ) {
- co.set( 'z', num_pipes_log2-1 - pipe );
- xormask2[pipe].add( co );
- }
-
- pipe_equation.xorin( xormask2 );
- }
- }
-
- xormask.reverse();
- pipe_equation.xorin( xormask );
-
- }
-}
-
-void RB_MAP::get_meta_miptail_coord( int& x, int& y, int& z, int mip_in_tail, int blk_width_log2, int blk_height_log2, int blk_depth_log2 )
-{
- bool is_thick = (blk_depth_log2>0);
- int m;
- int mip_width = 1 << blk_width_log2;
- int mip_height = 1 << (blk_height_log2-1);
- int mip_depth = 1 << blk_depth_log2;
-
- // Find the minimal increment, based on the block size and 2d/3d
- int min_inc;
- if(is_thick) {
- min_inc = (blk_height_log2 >= 9) ? 128 : ((blk_height_log2 == 8) ? 64 : 32);
- } else if(blk_height_log2>=10) {
- min_inc = 256;
- } else if(blk_height_log2==9) {
- min_inc = 128;
- } else {
- min_inc = 64;
- }
-
- for( m=0; m<mip_in_tail; m++ ) {
- if( mip_width <= 32 ) {
- // special case when below 32x32 mipmap
- switch(mip_in_tail-m) {
- case 0: break; // 32x32
- case 1: x+=32; break; // 16x16
- case 2: y+=32; break; // 8x8
- case 3: y+=32; x+=16; break;// 4x4
- case 4: y+=32; x+=32; break;// 2x2
- case 5: y+=32; x+=48; break;// 1x1
- // The following are for BC/ASTC formats
- case 6: y+=48; break; // 1/2 x 1/2
- case 7: y+=48; x+=16; break;// 1/4 x 1/4
- case 8: y+=48; x+=32; break;// 1/8 x 1/8
- default:y+=48; x+=48; break;// 1/16 x 1/16
- }
- m = mip_in_tail; // break the loop
- } else {
- if( mip_width <= min_inc ) {
- // if we're below the minimal increment...
- if( is_thick ) {
- // For 3d, just go in z direction
- z += mip_depth;
- } else {
- // For 2d, first go across, then down
- if( mip_width * 2 == min_inc ) {
- // if we're 2 mips below, that's when we go back in x, and down in y
- x -= min_inc;
- y += min_inc;
- } else {
- // otherwise, just go across in x
- x += min_inc;
- }
- }
- } else {
- // On even mip, go down, otherwise, go across
- if( m&1 ) {
- x += mip_width;
- } else {
- y += mip_height;
- }
- }
- // Divide the width by 2
- mip_width = mip_width / 2;
- // After the first mip in tail, the mip is always a square
- mip_height = mip_width;
- // ...or for 3d, a cube
- if(is_thick) mip_depth = mip_width;
- }
- }
-}
-
-void RB_MAP::get_mip_coord( int& x, int& y, int& z, int mip,
- int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
- int data_blk_width_log2, int data_blk_height_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip,
- int data_type, int bpp_log2, bool meta_linear )
-{
- if( meta_linear ) {
- get_mip_coord_linear( x, y, z, mip, data_blk_width_log2, data_blk_height_log2,
- surf_width, surf_height, surf_depth, epitch, max_mip, data_type, bpp_log2 );
- } else {
- get_mip_coord_nonlinear( x, y, z, mip, meta_blk_width_log2, meta_blk_height_log2, meta_blk_depth_log2,
- surf_width, surf_height, surf_depth, epitch, max_mip, data_type );
- }
-}
-
-void RB_MAP::get_mip_coord_linear( int& x, int& y, int& z,
- int mip,
- int data_blk_width_log2, int data_blk_height_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch,
- int max_mip, int data_type, int bpp_log2
- )
-{
- bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
-
- if( data_linear ) {
- // linear width is padded out to 256 Bytes
- int width_padding = 8 - bpp_log2;
- int width_pad_mask = ~(0xffffffff << width_padding);
- int padded_surf_width = surf_width;
- int padded_surf_height = (data_type == DATA_COLOR1D) ? 1 : surf_height;
-
- if( max_mip > 0 ) {
- int mip_width = padded_surf_width;
- int mip_height = padded_surf_height;
- int padded_mip_height = 0;
- int mip_base = 0;
- int m = 0;
- while( (mip_width >= 1 || mip_height >= 1) && m <= max_mip ) {
- if( mip == m ) mip_base = padded_mip_height;
- padded_mip_height += mip_height;
- m++;
- mip_width = (mip_width / 2) + (mip_width & 1);
- mip_height = (mip_height / 2) + (mip_height & 1);
- }
- if( mip >= m ) {
- // assert error
- mip_base = padded_mip_height - mip_height;
- }
- padded_surf_height = padded_mip_height;
-
- if(epitch > 0){
- padded_surf_height = epitch;
- }
- y += mip_base;
- padded_surf_width = ((surf_width >> width_padding) + ((surf_width & width_pad_mask) ? 1 : 0)) << width_padding;
- }
- else{
- padded_surf_width = ((surf_width >> width_padding) + ((surf_width & width_pad_mask) ? 1 : 0)) << width_padding;
-
- // Pad up epitch to meta block width
- if( (epitch & width_pad_mask) != 0 ) {
- epitch = ((epitch >> width_padding) + 1) << width_padding;
- }
- // Take max of epitch and computed surf width
- if( epitch < padded_surf_width ) {
- // assert error
- } else {
- padded_surf_width = epitch;
- }
- }
-
- surf_width = padded_surf_width;
- surf_height = padded_surf_height;
- }
- else {
- // padding based data block size
- int width_pad_mask = ~(0xffffffff << data_blk_width_log2);
- int height_pad_mask = ~(0xffffffff << data_blk_height_log2);
-
- // Pad the data surface dimensions by the block dimensions, and put the result in compressed block dimension units
- surf_width = ((surf_width >> data_blk_width_log2) + ((surf_width & width_pad_mask) ? 1 : 0)) << data_blk_width_log2;
- surf_height = ((surf_height >> data_blk_height_log2) + ((surf_height & height_pad_mask) ? 1 : 0)) << data_blk_height_log2;
-
- // Tiled data, linear metadata
- if( max_mip > 0 ) {
- // we don't allow mipmapping on tiled data, with linear metadata
- // assert error
- }
-
- // Pad up epitch to data block width
- if( (epitch & width_pad_mask) != 0 ) {
- epitch = ((epitch >> data_blk_width_log2) + 1) << data_blk_width_log2;
- }
- // Take max of epitch and computed surf width
- if( epitch < surf_width ) {
- // assert error
- } else {
- surf_width = epitch;
- }
- }
-}
-
-void RB_MAP::get_mip_coord_nonlinear( int& x, int& y, int& z,
- int mip,
- int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
-
- // Outputs
- int& surf_width, int& surf_height, int& surf_depth,
-
- int epitch, int max_mip, int data_type
- )
-{
- bool is3d = (data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- int order; // 0 = xmajor, 1 = ymajor, 2 = zmajor
-
- int mip_width = surf_width;
- int mip_height = surf_height;
- int mip_depth = (is3d) ? surf_depth : 1;
-
- // Divide surface w/h/d by block size, padding if needed
- surf_width = (((surf_width & ((1<<meta_blk_width_log2 )-1)) != 0) ? 1 : 0) + (surf_width >> meta_blk_width_log2);
- surf_height = (((surf_height & ((1<<meta_blk_height_log2)-1)) != 0) ? 1 : 0) + (surf_height >> meta_blk_height_log2);
- surf_depth = (((surf_depth & ((1<<meta_blk_depth_log2 )-1)) != 0) ? 1 : 0) + (surf_depth >> meta_blk_depth_log2);
- epitch = (((epitch & ((1<<meta_blk_width_log2 )-1)) != 0) ? 1 : 0) + (epitch >> meta_blk_width_log2);
-
- if( max_mip > 0 ) {
- // Determine major order
- if( is3d && surf_depth > surf_width && surf_depth > surf_height ) {
- order = 2; // Z major
- }
- else if( surf_width >= surf_height ) {
- order = 0; // X major
- }
- else {
- order = 1; // Y major
- }
-
- // Check if mip 0 is in the tail
- bool in_tail = (mip_width <= (1<<meta_blk_width_log2)) &&
- (mip_height <= (1<<(meta_blk_height_log2-1))) &&
- (!is3d || (mip_depth <= (1<<meta_blk_depth_log2)));
- // Pad the mip w/h/d, which is just the surf w/h/d times blk dim
- mip_width = surf_width << meta_blk_width_log2;
- mip_height = surf_height << meta_blk_height_log2;
- mip_depth = surf_depth << meta_blk_depth_log2;
-
- if( !in_tail ) {
- // Select the dimension that stores the mip chain, based on major order
- // Then pad it out to max(2, ceil(mip_dim/2))
- int& mip_dim = (order == 1) ? surf_width : surf_height;
- // in y-major, if height > 2 blocks, then we need extra padding;
- // in x or z major, it only occurs if width/depth is greater than 4 blocks
- // Height is special, since we can enter the mip tail when height is 1/2 block high
- int order_dim_limit = (order == 1) ? 2 : 4;
- int& order_dim = (order == 0) ? surf_width : ((order == 1) ? surf_height : surf_depth);
- if( mip_dim < 3 && order_dim > order_dim_limit && max_mip >= 3 ) mip_dim += 2;
- else mip_dim += (mip_dim/2) + (mip_dim&1);
- }
-
- int m;
- for( m=0; m<mip; m++ ) {
- if( in_tail ) {
- get_meta_miptail_coord( x, y, z, mip-m, meta_blk_width_log2, meta_blk_height_log2, meta_blk_depth_log2 );
- m = mip; // break the loop
- } else {
- // Move either x, y, or z by the mip dimension based on which mip we're on and the order
- if(m>=3 || m&1) {
- switch(order) {
- case 0: x += mip_width; break;
- case 1: y += mip_height; break;
- case 2: z += mip_depth; break;
- }
- } else {
- switch(order) {
- case 0: y += mip_height; break;
- case 1: x += mip_width; break;
- case 2: y += mip_height; break;
- }
- }
- // Compute next mip's dimensions
- mip_width = (mip_width/2);
- mip_height = (mip_height/2);
- mip_depth = (mip_depth/2);
- // See if it's in the tail
- in_tail = (mip_width <= (1<<meta_blk_width_log2)) &&
- (mip_height <= (1<<(meta_blk_height_log2-1))) &&
- (!is3d || (mip_depth <= (1<<meta_blk_depth_log2)));
- // Pad out mip dimensions
- mip_width = ((mip_width >> meta_blk_width_log2) + ((mip_width & ((1<<meta_blk_width_log2) -1)) != 0)) << meta_blk_width_log2;
- mip_height = ((mip_height >> meta_blk_height_log2) + ((mip_height & ((1<<meta_blk_height_log2)-1)) != 0)) << meta_blk_height_log2;
- mip_depth = ((mip_depth >> meta_blk_depth_log2) + ((mip_depth & ((1<<meta_blk_depth_log2) -1)) != 0)) << meta_blk_depth_log2;
- }
- }
- } else {
- // Take max of epitch and computed surf width
- surf_width = (surf_width > epitch) ? surf_width : epitch;
- }
-
- // Multiply the surface dimension by block size
- surf_width = surf_width << meta_blk_width_log2;
- surf_height = surf_height << meta_blk_height_log2;
- surf_depth = surf_depth << meta_blk_depth_log2;
-
-}
-
-void
-RB_MAP::get_meta_eq( CoordEq& metaaddr,
- int max_mip, int num_ses_log2, int num_rbs_log2,
- int &num_pipes_log2,
- int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
- int pipe_interleave_log2,
- int xmode,
- int data_type,
- int meta_alignment, bool meta_linear)
-{
- // Metaaddressing
- Coordinate co;
- CoordEq cur_rbeq, pipe_equation, orig_pipe_equation;
-
- bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
- bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- //bool is3d = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
-
- bool is_fmask = (data_type == DATA_FMASK);
- bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
- bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
-
- bool is_mipmapped = (max_mip > 0) ? true : false;
-
- int pipe_mask = 0x0;
- int comp_frag_log2 = (is_color && (num_samples_log2 > max_comp_frag_log2)) ? max_comp_frag_log2 : num_samples_log2;
-
- int uncomp_frag_log2 = num_samples_log2 - comp_frag_log2;
-
- // Constraints on linear
- if ( data_linear ) {
- xmode = NONE;
- num_samples_log2 = 0;
- is_rb_aligned = false;
- meta_linear = true;
- }
- if( meta_linear && !data_linear ) {
- is_pipe_aligned = false;
- }
-
- // Min metablock size if thick is 64KB, otherwise 4KB
- int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
-
- // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
- int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
-
- int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
-
- // Get the total # of RB's before modifying due to rb align
- int num_total_rbs_pre_rb_align_log2 = num_ses_log2 + num_rbs_log2;
-
- // Cap the pipe bits to block size
- int num_ses_data_log2 = num_ses_log2;
- cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2,
- num_samples_log2, pipe_interleave_log2, block_size_log2, num_pipes_log2 );
-
- // if not pipe aligned, set num_pipes_log2, num_ses_log2 to 0
- if( !is_pipe_aligned ) {
- num_pipes_log2 = 0;
- num_ses_data_log2 = 0;
- }
-
- // Get the correct data address and rb equation
- CoordEq dataaddr;
- Get_Data_Offset_Equation( dataaddr,
- (meta_linear) ? DATA_COLOR1D : data_type,
- bpp_log2, num_samples_log2, block_size_log2 );
-
-
- // if not rb aligned, set num_ses_log2/rbs_log2 to 0; note, this is done after generating the data equation
- if( !is_rb_aligned ) {
- num_ses_log2 = 0;
- num_rbs_log2 = 0;
- }
-
- // Get pipe and rb equations
- Get_Pipe_Equation( pipe_equation, dataaddr, pipe_interleave_log2,
- num_pipes_log2, block_size_log2, num_samples_log2, xmode, data_type );
-
- CoordEq& this_rbeq = rb_equation[num_ses_log2][num_rbs_log2];
-
- num_pipes_log2 = pipe_equation.getsize();
-
- if( meta_linear ) {
- dataaddr.copy( metaaddr );
- if( data_linear ) {
- if( is_pipe_aligned ) {
- // Remove the pipe bits
- metaaddr.shift( -num_pipes_log2, pipe_interleave_log2 );
- }
- // Divide by comp block size, which for linear (which is always color) is 256 B
- metaaddr.shift( -8 );
- if( is_pipe_aligned ) {
- // Put pipe bits back in
- metaaddr.shift( num_pipes_log2, pipe_interleave_log2 );
- int i;
- for( i=0; i<num_pipes_log2; i++ ) {
- pipe_equation[i].copyto(metaaddr[pipe_interleave_log2+i]);
- }
- }
- }
- metaaddr.shift( 1 );
- return;
- }
-
- int i, j, k, old_size, new_size;
- int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
-
- // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
- int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
- int comp_blk_size_log2 = 8;
-
- // For color surfaces, compute the comp block width, height, and depth
- // For non-color surfaces, compute the comp block size
- if( is_color ) {
- Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
- metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
- }
- else {
- comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
- }
-
- // Compute meta block width and height
- int num_comp_blks_per_meta_blk;
- if (num_pipes_log2==0 && num_ses_log2==0 && num_rbs_log2==0) {
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
- else {
- num_comp_blks_per_meta_blk = num_total_rbs_pre_rb_align_log2 + ((is_thick) ? 18 : 10);
-
- if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2)
- num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
-
- if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
-
- int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
- Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk, is_thick, is_mipmapped, // mipmaps should be y-biased
- comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
- meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2 );
-
- // Make sure the metaaddr is cleared
- metaaddr.resize(0);
- metaaddr.resize(27);
-
- //------------------------------------------------------------------------------------------------------------------------
- // Use the growing square or growing cube order for thick as a starting point for the metadata address
- //------------------------------------------------------------------------------------------------------------------------
- if( is_thick ) {
- Coordinate cx( 'x', 0 );
- Coordinate cy( 'y', 0 );
- Coordinate cz( 'z', 0 );
- if(is_mipmapped) {
- metaaddr.mort3d( cy, cx, cz );
- } else {
- metaaddr.mort3d( cx, cy, cz );
- }
- }
- else {
- Coordinate cx( 'x', 0 );
- Coordinate cy( 'y', 0 );
- Coordinate cs;
-
- if(is_mipmapped) {
- metaaddr.mort2d( cy, cx, comp_frag_log2 );
- } else {
- metaaddr.mort2d( cx, cy, comp_frag_log2 );
- }
-
- //------------------------------------------------------------------------------------------------------------------------
- // Put the compressible fragments at the lsb
- // the uncompressible frags will be at the msb of the micro address
- //------------------------------------------------------------------------------------------------------------------------
- int s;
- for( s=0; s<comp_frag_log2; s++ ) {
- cs.set( 's', s );
- metaaddr[s].add(cs);
- }
- }
-
- // Keep a copy of the pipe and rb equations
- this_rbeq.copy( cur_rbeq );
- pipe_equation.copy( orig_pipe_equation );
-
- // filter out everything under the compressed block size
- co.set( 'x', comp_blk_width_log2 );
- metaaddr.Filter( '<', co, 0, 'x' );
- co.set( 'y', comp_blk_height_log2 );
- metaaddr.Filter( '<', co, 0, 'y' );
- co.set( 'z', comp_blk_depth_log2 );
- metaaddr.Filter( '<', co, 0, 'z' );
- // For non-color, filter out sample bits
- if( !is_color ) {
- co.set( 'x', 0 );
- metaaddr.Filter( '<', co, 0, 's' );
- }
-
- // filter out everything above the metablock size
- co.set( 'x', meta_block_width_log2-1 );
- metaaddr.Filter( '>', co, 0, 'x' );
- co.set( 'y', meta_block_height_log2-1 );
- metaaddr.Filter( '>', co, 0, 'y' );
- co.set( 'z', meta_block_depth_log2-1 );
- metaaddr.Filter( '>', co, 0, 'z' );
-
- // filter out everything above the metablock size for the channel bits
- co.set( 'x', meta_block_width_log2-1 );
- pipe_equation.Filter( '>', co, 0, 'x' );
- co.set( 'y', meta_block_height_log2-1 );
- pipe_equation.Filter( '>', co, 0, 'y' );
- co.set( 'z', meta_block_depth_log2-1 );
- pipe_equation.Filter( '>', co, 0, 'z' );
-
- // Make sure we still have the same number of channel bits
- if( pipe_equation.getsize() != static_cast<UINT_32>(num_pipes_log2) ) {
- // assert
- }
-
- // Loop through all channel and rb bits, and make sure these components exist in the metadata address
- for( i=0; i<num_pipes_log2; i++ ) {
- for( j=pipe_equation[i].getsize()-1; j>=0; j-- ) {
- if( !metaaddr.Exists( pipe_equation[i][j] ) ) {
- // assert
- }
- }
- }
- for( i=0; i<num_total_rbs_log2; i++ ) {
- for( j=cur_rbeq[i].getsize()-1; j>=0; j-- ) {
- if( !metaaddr.Exists( cur_rbeq[i][j] ) ) {
- // assert
- }
- }
- }
-
- // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
- int old_rb_bits_left = num_total_rbs_log2;
- for( i=0; i<num_total_rbs_log2; i++ ) {
- for(j=0; j<num_pipes_log2; j++ ) {
- if( cur_rbeq[i] == pipe_equation[j] ) {
- cur_rbeq[i].Clear();
- old_rb_bits_left--;
- // Mark which pipe bit caused the RB bit to be dropped
- pipe_mask |= (1 << j);
- }
- }
- }
-
- // Loop through each bit of the channel, get the smallest coordinate, and remove it from the metaaddr, and rb_equation
- for( i=0; i<num_pipes_log2; i++ ) {
- pipe_equation[i].getsmallest( co );
-
- old_size = metaaddr.getsize();
- metaaddr.Filter( '=', co );
- new_size = metaaddr.getsize();
- if( new_size != old_size-1 ) {
- // assert warning
- }
- pipe_equation.remove( co );
- for( j=0; j<num_total_rbs_log2; j++ ) {
- if( cur_rbeq[j].remove( co ) ) {
- // if we actually removed something from this bit, then add the remaining
- // channel bits, as these can be removed for this bit
- for( k=0; (unsigned)k<pipe_equation[i].getsize(); k++ ) {
- if( pipe_equation[i][k] != co ) {
- cur_rbeq[j].add( pipe_equation[i][k] );
- }
- }
- // if the rb bit is still empty, then we have to mark all pipe bits as affecting the RB
- if( cur_rbeq[j].getsize() == 0 ) {
- pipe_mask = (1 << num_pipes_log2) - 1;
- }
- }
- }
- }
-
- // Loop through the rb bits and see what remain; filter out the smallest coordinate if it remains
- int rb_bits_left = 0;
- for( i=0; i<num_total_rbs_log2; i++ ) {
- if( cur_rbeq[i].getsize() > 0 ) {
- rb_bits_left++;
- cur_rbeq[i].getsmallest( co );
- old_size = metaaddr.getsize();
- metaaddr.Filter( '=', co );
- new_size = metaaddr.getsize();
- if( new_size != old_size-1 ) {
- // assert warning
- }
- for( j=i+1; j<num_total_rbs_log2; j++ ) {
- if( cur_rbeq[j].remove( co ) ) {
- // if we actually removed something from this bit, then add the remaining
- // rb bits, as these can be removed for this bit
- for( k=0; (unsigned)k<cur_rbeq[i].getsize(); k++ ) {
- if( cur_rbeq[i][k] != co ) {
- cur_rbeq[j].add( cur_rbeq[i][k] );
- }
- }
- }
- }
- }
- }
-
- // capture the size of the metaaddr
- i = metaaddr.getsize();
- // resize to 49 bits...make this a nibble address
- metaaddr.resize(49);
- // Concatenate the macro address above the current address
- for( j=0; i<49; i++, j++ ) {
- co.set( 'm', j );
- metaaddr[i].add( co );
- }
-
- // Multiply by meta element size (in nibbles)
- if( is_color ) {
- metaaddr.shift( 1 ); // Byte size element
- } else if( data_type == DATA_Z_STENCIL ) {
- metaaddr.shift( 3 ); // 4 Byte size elements
- }
-
- //------------------------------------------------------------------------------------------------------------------------
- // Note the pipe_interleave_log2+1 is because address is a nibble address
- // Shift up from pipe interleave number of channel and rb bits left, and uncompressed fragments
- //------------------------------------------------------------------------------------------------------------------------
-
- metaaddr.shift( num_pipes_log2 + rb_bits_left + uncomp_frag_log2,
- pipe_interleave_log2+1 );
-
- // Put in the channel bits
- for( i=0; i<num_pipes_log2; i++ ) {
- orig_pipe_equation[i].copyto( metaaddr[pipe_interleave_log2+1 + i] );
- }
-
- // Put in remaining rb bits
- i = 0;
- for( j=0; j<rb_bits_left; i=(i+1) % num_total_rbs_log2 ) {
- if( cur_rbeq[i].getsize() > 0 ) {
- rb_equation[num_ses_log2][num_rbs_log2][i].copyto( metaaddr[pipe_interleave_log2+1 + num_pipes_log2 + j] );
- // Mark any rb bit we add in to the rb mask
- j++;
- }
- }
-
- //------------------------------------------------------------------------------------------------------------------------
- // Put in the uncompressed fragment bits
- //------------------------------------------------------------------------------------------------------------------------
- for( i=0; i<uncomp_frag_log2; i++ ) {
- co.set( 's', comp_frag_log2+i );
- metaaddr[pipe_interleave_log2+1 + num_pipes_log2 + rb_bits_left + i].add( co );
- }
-
-
- //------------------------------------------------------------------------------------------------------------------------
- // Check that the metadata SE bits match the data address
- //------------------------------------------------------------------------------------------------------------------------
- for( i=0; i<num_ses_data_log2; i++ ) {
- if(num_total_rbs_log2-num_ses_data_log2+i >= 0){
- if( metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] != dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] ||
- metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] != rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i]) {
- //FIXME: Removed to prevent logs from growing large in size // cout << "Warning: GPU bit " << i << " differs from data addr or RB equation on " << data_name << title << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << " Data: " << dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << "MData: " << metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << " RBeq: " << rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i] << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << " Pipe: " << orig_pipe_equation << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << " DEq: " << dataaddr << endl;
- }
- }
- }
-}
-
-long
-RB_MAP::get_meta_addr_calc( int x, int y, int z, int s,
- long surf_base, int element_bytes_log2, int num_samples_log2, int max_comp_frag_log2,
- long pitch, long slice,
- int max_mip,
-
- //int swizzle_mode,
- int xmode, int pipe_xor, int block_size_log2,
-
- /*int num_banks_log2,*/
- int num_pipes_log2,
- int pipe_interleave_log2,
-
- int meta_alignment,
- int dim_type,
- int x_mip_org, int y_mip_org, int z_mip_org,
-
- int num_ses_log2, int num_rbs_log2,
- /*bool se_affinity_enable, */
-
- int data_type,
-
- int l2_metablk_w, int l2_metablk_h, int l2_metablk_d,
- bool meta_linear
- )
-{
- int bpp_log2 = element_bytes_log2;
- int mip_base_x = x_mip_org;
- int mip_base_y = y_mip_org;
- int mip_base_z = z_mip_org;
-
- CoordEq metaaddr;
-
- //bool se_affinity_enable = false;
- //int max_pipe_bytes = std::max(1<<num_pipes_log2 * 1<<pipe_interleave_log2, 1024 * 1<<log2_element_bytes);
- //int max_banks_samples = std::max(1<<num_banks_log2, 1<<num_samples_log2);
- //int block_size_log2 = max(4096, max_pipe_bytes * max_bank_samples * 1<<num_ses_log2);
-
- bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
- bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
- bool is_fmask = (data_type == DATA_FMASK);
-
- bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
- bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
-
- if ( data_linear )
- meta_linear = true;
-
- if ( !data_linear && meta_linear)
- max_mip = 0;
-
- // Min metablock size if thick is 64KB, otherwise 4KB
- int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
-
- // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
- int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
- int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
-
- int num_ses_data_log2 = num_ses_log2;
- int block_size_data_log2 = block_size_log2;
- int num_pipes_data_log2 = num_pipes_log2;
-
- //int num_banks_data_log2 = num_banks_log2;
- cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, num_samples_log2, pipe_interleave_log2, block_size_data_log2, num_pipes_data_log2/*, num_banks_data_log2 */);
-
- // Get the correct data address and rb equation
- CoordEq dataaddr;
- Get_Data_Offset_Equation( dataaddr, data_type, bpp_log2, num_samples_log2, block_size_data_log2 );
-
- get_meta_eq( metaaddr, max_mip, num_ses_log2, num_rbs_log2, num_pipes_log2, /*num_banks_log2,*/ block_size_log2,
- bpp_log2, num_samples_log2, max_comp_frag_log2, pipe_interleave_log2, xmode,
- data_type, meta_alignment, meta_linear);
- // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
- int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
- int comp_blk_size_log2 = 8;
-
- if ( is_color ){
- Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
- metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
- }
- else {
- comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
- }
-
- // Compute meta block width and height
- int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
- int num_comp_blks_per_meta_blk;
- if((!is_pipe_aligned || num_pipes_log2==0) && (!is_rb_aligned || (num_ses_log2==0 && num_rbs_log2==0))) {
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
- else {
- num_comp_blks_per_meta_blk = num_total_rbs_log2 + ((is_thick) ? 18 : 10);
- if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
- if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
-
- int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
-
- //@@todo kr missing meta_block_width*
-
- // Get the data block size
- int data_block_width_log2, data_block_height_log2, data_block_depth_log2;
-
- Get_Meta_Block_Screen_Space( block_size_log2 - comp_blk_size_log2,
- is_thick, true,
- comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
- data_block_width_log2, data_block_height_log2, data_block_depth_log2 );
-
- meta_block_width_log2 = l2_metablk_w;
- meta_block_height_log2 = l2_metablk_h;
- meta_block_depth_log2 = l2_metablk_d;
-
- int meta_x = mip_base_x + x ;
- int meta_y = mip_base_y + y ;
- int meta_z = mip_base_z + z ;
-
- if( meta_linear ){
- if(!data_linear) {
- // Tiled data, linear metadata
- meta_x = meta_x >> comp_blk_width_log2;
- meta_y = meta_y >> comp_blk_height_log2;
- meta_z = meta_z >> comp_blk_depth_log2;
- pitch = pitch >> comp_blk_width_log2;
- slice = slice >> (comp_blk_width_log2 + comp_blk_height_log2);
- }
- else{
- meta_x = meta_x << bpp_log2;
- meta_y = meta_y << bpp_log2;
- meta_z = meta_z << bpp_log2;
- }
- }
- else{
- meta_x = meta_x >> meta_block_width_log2;
- meta_y = meta_y >> meta_block_height_log2;
- meta_z = meta_z >> meta_block_depth_log2;
-
- pitch = pitch >> meta_block_width_log2;
- slice = slice >> (meta_block_width_log2 + meta_block_height_log2);
- }
-
- long macroaddr = (long)meta_x + (long)meta_y*(long)pitch + (long)meta_z*(long)slice;
-
- int mip_tail_x, mip_tail_y, mip_tail_z;
- mip_tail_x = mip_base_x & ((1 << meta_block_width_log2 )-1);
- mip_tail_y = mip_base_y & ((1 << meta_block_height_log2)-1);
- mip_tail_z = mip_base_z & ((1 << meta_block_depth_log2)-1);
-
- int mip_x = x + mip_tail_x;
- int mip_y = y + mip_tail_y;
- int mip_z = z + mip_tail_z;
-
- // the pipe_interleave_log2+1 is because we are dealing with nibble addresses
- long pipe_xor_mask = (pipe_xor & ((1 << num_pipes_data_log2)-1)) << (pipe_interleave_log2+1);
-
- // shift surf_base to make it a nibble address
- long meta_offset_from_base_nibble_address = metaaddr.solve( mip_x, mip_y, mip_z, s, macroaddr );
-
- long address = (surf_base << 1) + (meta_offset_from_base_nibble_address ^ pipe_xor_mask);
-
- return address;
-}
-
-#if 0
-long
-RB_MAP::get_meta_addr( int x, int y, int z, int s, int mip,
- int surf_width, int surf_height, int surf_depth, int lpitch,
- long surf_base, int pipe_xor, int max_mip,
- int num_ses_log2, int num_rbs_log2, int num_pipes_log2,
- int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
- int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear)
-{
- CoordEq metaaddr;
-
- bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
- bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
- bool is_fmask = (data_type == DATA_FMASK);
-
- bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
- bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
-
- bool is_mipmapped = (max_mip > 0) ? true : false;
-
- if( data_linear ) meta_linear = true;
- // Don't allow mipmapping on the tiled data, meta linear case
- // or if we have linear 2d/3d surface
-
- #ifdef ADDRESS__LPITCH_DISABLE__0
- if( (!data_linear && meta_linear) || (data_type == DATA_COLOR2D_LINEAR) ) max_mip = 0;
- #else
- if( !data_linear && meta_linear) max_mip = 0;
- #endif
-
- // Min metablock size if thick is 64KB, otherwise 4KB
- int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
-
-
- // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
- int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
- int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
-
- // Cap the pipe bits to block size
- int num_ses_data_log2 = num_ses_log2;
- int block_size_data_log2 = block_size_log2;
- int num_pipes_data_log2 = num_pipes_log2;
-
- cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, num_samples_log2, pipe_interleave_log2, block_size_data_log2, num_pipes_data_log2 );
-
- // Get the correct data address and rb equation
- CoordEq dataaddr;
- Get_Data_Offset_Equation( dataaddr, data_type, bpp_log2, num_samples_log2, block_size_data_log2 );
-
- get_meta_eq( metaaddr, max_mip, num_ses_log2, num_rbs_log2, num_pipes_log2, block_size_log2,
- bpp_log2, num_samples_log2, max_comp_frag_log2, pipe_interleave_log2, xmode, data_type,
- meta_alignment, meta_linear);
-
- // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
- int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
- int comp_blk_size_log2 = 8;
-
- if ( is_color ) {
- Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
- metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
- } else {
- comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
- }
-
- // Compute meta block width and height
- int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
-
- int num_comp_blks_per_meta_blk;
- if((!is_pipe_aligned || num_pipes_log2==0) && (!is_rb_aligned || (num_ses_log2==0 && num_rbs_log2==0))) {
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
- else {
- num_comp_blks_per_meta_blk = num_total_rbs_log2 + ((is_thick) ? 18 : 10);
-
- if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
-
- if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
-
- int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
-
-
- Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk, is_thick, is_mipmapped,
- comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
- meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2 );
-
- // Get the data block size
- int data_block_width_log2, data_block_height_log2, data_block_depth_log2;
-
- Get_Meta_Block_Screen_Space( block_size_log2 - comp_blk_size_log2, is_thick, true,
- comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
- data_block_width_log2, data_block_height_log2, data_block_depth_log2 );
-
- int meta_x, meta_y, meta_z;
- int meta_surf_width = surf_width;
- int meta_surf_height = surf_height;
- int meta_surf_depth = surf_depth;
-
- int mip_base_x=0, mip_base_y=0, mip_base_z=0;
- get_mip_coord( mip_base_x, mip_base_y, mip_base_z, mip,
- meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2,
- data_block_width_log2, data_block_height_log2,
- meta_surf_width, meta_surf_height, meta_surf_depth, lpitch, max_mip,
- data_type, bpp_log2, meta_linear );
-
- meta_x = mip_base_x + x;
- meta_y = mip_base_y + y;
- meta_z = mip_base_z + z;
-
- if( meta_linear ) {
- if( !data_linear ) {
- // Tiled data, linear metadata
- meta_x = meta_x >> comp_blk_width_log2;
- meta_y = meta_y >> comp_blk_height_log2;
- meta_z = meta_z >> comp_blk_depth_log2;
- meta_surf_width = meta_surf_width >> comp_blk_width_log2;
- meta_surf_height = meta_surf_height >> comp_blk_height_log2;
- }
- else{
- meta_x = meta_x << bpp_log2;
- meta_y = meta_y << bpp_log2;
- meta_z = meta_z << bpp_log2;
- }
- } else {
- meta_x = meta_x >> meta_block_width_log2;
- meta_y = meta_y >> meta_block_height_log2;
- meta_z = meta_z >> meta_block_depth_log2;
- meta_surf_width = meta_surf_width >> meta_block_width_log2;
- meta_surf_height = meta_surf_height >> meta_block_height_log2;
- }
-
- long macroaddr = (long)meta_x + (long)meta_y*(long)meta_surf_width + (long)meta_z*(long)meta_surf_width*(long)meta_surf_height;
-
- int mip_tail_x, mip_tail_y, mip_tail_z;
- mip_tail_x = mip_base_x & ((1 << meta_block_width_log2 )-1);
- mip_tail_y = mip_base_y & ((1 << meta_block_height_log2)-1);
- mip_tail_z = mip_base_z & ((1 << meta_block_depth_log2)-1);
-
- int mip_x = x + mip_tail_x;
- int mip_y = y + mip_tail_y;
- int mip_z = z + mip_tail_z;
-
- // the pipe_interleave_log2+1 is because we are dealing with nibble addresses
- long pipe_xor_mask = (pipe_xor & ((1 << num_pipes_data_log2)-1)) << (pipe_interleave_log2+1);
-
- // shift surf_base to make it a nibble address
- long address = (surf_base << 1) + (metaaddr.solve( mip_x, mip_y, mip_z, s, macroaddr ) ^ pipe_xor_mask);
-
- return address;
-}
-#endif
-
-void
-RB_MAP::Initialize()
-{
- int num_se_log2, num_rb_per_se_log2;
- for( num_se_log2=0; num_se_log2<5; num_se_log2++ ) {
- for( num_rb_per_se_log2=0; num_rb_per_se_log2<3; num_rb_per_se_log2++ ) {
- Get_RB_Equation( rb_equation[num_se_log2][num_rb_per_se_log2], num_se_log2, num_rb_per_se_log2 );
- }
- }
-
- int pix_size_log2, num_samples_log2;
- for( pix_size_log2=0; pix_size_log2<4; pix_size_log2++ ) {
- for( num_samples_log2=0; num_samples_log2<4; num_samples_log2++ ) {
- Get_Data_Offset_Equation( zaddr[pix_size_log2][num_samples_log2], DATA_Z_STENCIL, pix_size_log2, num_samples_log2, 16 );
- }
- }
-
- for( pix_size_log2=0; pix_size_log2<5; pix_size_log2++ ) {
- for( num_samples_log2=0; num_samples_log2<4; num_samples_log2++ ) {
- Get_Data_Offset_Equation( caddr[pix_size_log2][num_samples_log2], DATA_COLOR2D, pix_size_log2, num_samples_log2, 16 );
- }
- }
-
- for( pix_size_log2=0; pix_size_log2<5; pix_size_log2++ ) {
- Get_Data_Offset_Equation( c3addr[pix_size_log2][0], DATA_COLOR3D_S, pix_size_log2, 0, 16 );
- Get_Data_Offset_Equation( c3addr[pix_size_log2][1], DATA_COLOR3D_Z, pix_size_log2, 0, 16 );
- }
-}
-
diff --git a/src/amd/addrlib/gfx9/rbmap.h b/src/amd/addrlib/gfx9/rbmap.h
deleted file mode 100644
index 89c8922d3fe..00000000000
--- a/src/amd/addrlib/gfx9/rbmap.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-// This class RB_MAP contains the top-level calculation functions which are used to generate rb id map based rb id equations
-
-#ifndef __RB_MAP_H
-#define __RB_MAP_H
-
-#include "coord.h"
-
-class RB_MAP
-{
-public:
-
- enum MAX_VALUES {
- MAX_SES_LOG2 = 3,
- MAX_RBS_LOG2 = 2
- };
-
- enum COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2 {
- COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2_2D = 10,
- COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2_3D = 18
- };
-
- RB_MAP(void);
-
- void Get_Comp_Block_Screen_Space( CoordEq& addr, int bytes_log2, int* w, int* h, int* d = NULL);
-
- void Get_Meta_Block_Screen_Space( int num_comp_blocks_log2, bool is_thick, bool y_biased,
- int comp_block_width_log2, int comp_block_height_log2, int comp_block_depth_log2,
- int& meta_block_width_log2, int& meta_block_height_log2, int& meta_block_depth_log2 );
- void cap_pipe( int xmode, bool is_thick, int& num_ses_log2, int bpp_log2, int num_samples_log2, int pipe_interleave_log2,
- int& block_size_log2, int& num_pipes_log2 );
-
- void Get_Data_Offset_Equation( CoordEq& data_eq, int data_type, int bpp_log2, int num_samples_log2, int block_size_log2 );
-
- void Get_RB_Equation( CoordEq& rb_equation, int num_ses_log2, int num_rbs_log2 );
-
- void Get_Pipe_Equation( CoordEq& pipe_equation, CoordEq& addr,
- int pipe_interleave_log2,
- int num_pipes_log2,
- int block_size_log2,
- int num_samples_log2,
- int xmode, int data_type
- );
-
- void get_meta_miptail_coord( int& x, int& y, int& z, int mip_in_tail, int blk_width_log2, int blk_height_log2, int blk_depth_log2 );
-
- void get_mip_coord( int& x, int& y, int& z, int mip,
- int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
- int data_blk_width_log2, int data_blk_height_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip,
- int data_type, int bpp_log2, bool meta_linear );
-
- void get_mip_coord_linear( int& x, int& y, int& z, int mip, int data_blk_width_log2, int data_blk_height_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip, int data_type, int bpp_log2 );
-
- void get_mip_coord_nonlinear( int& x, int& y, int& z, int mip, int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip, int data_type );
-
- void get_meta_eq( CoordEq& metaaddr, int max_mip, int num_ses_log2, int num_rbs_log2, int &num_pipes_log2,
- int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
- int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear);
-
-#if 0
- long get_meta_addr( int x, int y, int z, int s, int mip,
- int surf_width, int surf_height, int surf_depth, int epitch,
- long surf_base, int pipe_xor, int max_mip,
- int num_ses_log2, int num_rbs_log2, int num_pipes_log2,
- int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
- int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear);
-#endif
-
- long get_meta_addr_calc( int x, int y, int z, int s,
- long surf_base, int element_bytes_log2, int num_samples_log2, int max_comp_frag_log2,
- long pitch, long slice,
- int max_mip,
- //int swizzle_mode,
- int xmode, int pipe_xor, int block_size_log2,
- /*int num_banks_log2,*/ int num_pipes_log2,
- int pipe_interleave_log2, int meta_alignment, int dim_type, int x_mip_org, int y_mip_org,
- int z_mip_org, int num_ses_log2, int num_rbs_log2, /*bool se_affinity_enable,*/ int data_type,
- int l2_metablk_w, int l2_metablk_h, int l2_metablk_d, bool meta_linear);
-
- void Initialize(void);
-
-public:
- enum XOR_RANGE {
- NONE = 0,
- XOR = 1,
- PRT = 2
- };
-
-
- enum DATA_TYPE_ENUM {
- DATA_COLOR1D,
- DATA_COLOR2D,
- DATA_COLOR3D_S,
- DATA_COLOR3D_Z,
- DATA_Z_STENCIL,
- DATA_FMASK,
- DATA_COLOR2D_LINEAR,
- DATA_COLOR3D_D_NOT_USED // should not be used; use COLOR2D instead
- };
-
- enum META_ALIGNMENT {
- META_ALIGN_NONE,
- META_ALIGN_PIPE,
- META_ALIGN_RB,
- META_ALIGN_PIPE_RB
- };
-
- CoordEq rb_equation[MAX_SES_LOG2+1][MAX_RBS_LOG2+1];
- CoordEq zaddr [4][4];
- CoordEq caddr [5][4];
- CoordEq c3addr[5][2];
-};
-
-#endif