summaryrefslogtreecommitdiffstats
path: root/src/amd/addrlib
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd/addrlib')
-rw-r--r--src/amd/addrlib/addrinterface.cpp4
-rw-r--r--src/amd/addrlib/addrinterface.h83
-rw-r--r--src/amd/addrlib/amdgpu_asic_addr.h129
-rw-r--r--src/amd/addrlib/core/addrcommon.h14
-rw-r--r--src/amd/addrlib/core/addrelemlib.cpp34
-rw-r--r--src/amd/addrlib/core/addrlib.cpp11
-rw-r--r--src/amd/addrlib/core/addrlib.h123
-rw-r--r--src/amd/addrlib/core/addrlib1.cpp73
-rw-r--r--src/amd/addrlib/core/addrlib2.cpp183
-rw-r--r--src/amd/addrlib/core/addrlib2.h86
-rw-r--r--src/amd/addrlib/gfx9/coord.cpp16
-rw-r--r--src/amd/addrlib/gfx9/coord.h16
-rw-r--r--src/amd/addrlib/gfx9/gfx9addrlib.cpp1084
-rw-r--r--src/amd/addrlib/gfx9/gfx9addrlib.h86
-rw-r--r--src/amd/addrlib/gfx9/rbmap.cpp1388
-rw-r--r--src/amd/addrlib/gfx9/rbmap.h142
-rw-r--r--src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h8
-rw-r--r--src/amd/addrlib/inc/chip/r800/si_gb_reg.h8
-rw-r--r--src/amd/addrlib/inc/lnx_common_defs.h129
-rw-r--r--src/amd/addrlib/meson.build4
-rw-r--r--src/amd/addrlib/r800/ciaddrlib.cpp47
-rw-r--r--src/amd/addrlib/r800/ciaddrlib.h38
-rw-r--r--src/amd/addrlib/r800/egbaddrlib.cpp656
-rw-r--r--src/amd/addrlib/r800/siaddrlib.cpp22
-rw-r--r--src/amd/addrlib/r800/siaddrlib.h44
25 files changed, 1324 insertions, 3104 deletions
diff --git a/src/amd/addrlib/addrinterface.cpp b/src/amd/addrlib/addrinterface.cpp
index 638556bd893..5fdf7fc3c65 100644
--- a/src/amd/addrlib/addrinterface.cpp
+++ b/src/amd/addrlib/addrinterface.cpp
@@ -534,11 +534,11 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
if (pLib != NULL)
{
- returnCode = pLib->ComputeDccInfo(pIn, pOut);
+ returnCode = pLib->ComputeDccInfo(pIn, pOut);
}
else
{
- returnCode = ADDR_ERROR;
+ returnCode = ADDR_ERROR;
}
return returnCode;
diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h
index f0da083af5d..8124b745f21 100644
--- a/src/amd/addrlib/addrinterface.h
+++ b/src/amd/addrlib/addrinterface.h
@@ -528,7 +528,7 @@ typedef union _ADDR_SURFACE_FLAGS
UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode
UINT_32 matchStencilTileCfg : 1; ///< Select tile index of stencil as well as depth surface
/// to make sure they share same tile config parameters
- UINT_32 reserved : 3; ///< Reserved bits
+ UINT_32 reserved : 2; ///< Reserved bits
};
UINT_32 value;
@@ -714,12 +714,6 @@ typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
};
UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE
};
-
-#if ADDR_AM_BUILD // These two fields are not valid in SW blt since no HTILE access
- UINT_32 addr5Swizzle; ///< ADDR5_SWIZZLE_MASK of DB_DEPTH_INFO
- BOOL_32 is32ByteTile; ///< Caller must have access to HTILE buffer and know if
- /// this tile is compressed to 32B
-#endif
} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;
/**
@@ -857,8 +851,11 @@ typedef union _ADDR_HTILE_FLAGS
{
struct
{
- UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable
- UINT_32 reserved :31; ///< Reserved bits
+ UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable
+ UINT_32 skipTcCompatSizeAlign : 1; ///< Flag indicates that addrLib will not align htile
+ /// size to 256xBankxPipe when computing tc-compatible
+ /// htile info.
+ UINT_32 reserved : 30; ///< Reserved bits
};
UINT_32 value;
@@ -915,6 +912,9 @@ typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT
UINT_64 sliceSize; ///< Slice size, in bytes.
BOOL_32 sliceInterleaved; ///< Flag to indicate if different slice's htile is interleaved
/// Compute engine clear can't be used if htile is interleaved
+ BOOL_32 nextMipLevelCompressible; ///< Flag to indicate whether HTILE can be enabled in
+ /// next mip level, it also indicates if memory set based
+ /// fast clear can be used for current mip level.
} ADDR_COMPUTE_HTILE_INFO_OUTPUT;
/**
@@ -2188,7 +2188,6 @@ ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
-
/**
****************************************************************************************************
* ADDR_PRT_INFO_INPUT
@@ -2233,6 +2232,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
const ADDR_PRT_INFO_INPUT* pIn,
ADDR_PRT_INFO_OUTPUT* pOut);
+
+
////////////////////////////////////////////////////////////////////////////////////////////////////
// DCC key functions
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2293,6 +2294,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
ADDR_COMPUTE_DCCINFO_OUTPUT* pOut);
+
+
/**
****************************************************************************************************
* ADDR_GET_MAX_ALIGNMENTS_OUTPUT
@@ -2693,10 +2696,8 @@ typedef struct _ADDR2_META_MIP_INFO
struct
{
- UINT_32 offset; ///< metadata offset within one slice,
- /// the thickness of a slice is meta block depth.
- UINT_32 sliceSize; ///< metadata size within one slice,
- /// the thickness of a slice is meta block depth.
+ UINT_32 offset;
+ UINT_32 sliceSize;
};
};
} ADDR2_META_MIP_INFO;
@@ -2720,9 +2721,7 @@ typedef struct _ADDR2_COMPUTE_HTILE_INFO_INPUT
UINT_32 unalignedHeight; ///< Depth surface original height (of mip0)
UINT_32 numSlices; ///< Number of slices of depth surface (of mip0)
UINT_32 numMipLevels; ///< Total mipmap levels of color surface
- UINT_32 firstMipIdInTail; ///< id of the first mip in tail,
- /// if no mip is in tail, it should be set to
- /// number of mip levels
+ UINT_32 firstMipIdInTail;
} ADDR2_COMPUTE_HTILE_INFO_INPUT;
/**
@@ -3308,8 +3307,7 @@ typedef struct _ADDR2_COMPUTE_DCCINFO_INPUT
UINT_32 numMipLevels; ///< Total mipmap levels of color surface
UINT_32 dataSurfaceSize; ///< The padded size of all slices and mip levels
///< useful in meta linear case
- UINT_32 firstMipIdInTail; ///< The id of first mip in tail, if no mip is in tail,
- /// it should be number of mip levels
+ UINT_32 firstMipIdInTail;
} ADDR2_COMPUTE_DCCINFO_INPUT;
/**
@@ -3339,8 +3337,13 @@ typedef struct _ADDR2_COMPUTE_DCCINFO_OUTPUT
UINT_32 metaBlkHeight; ///< DCC meta block height
UINT_32 metaBlkDepth; ///< DCC meta block depth
- UINT_32 fastClearSizePerSlice; ///< Size of DCC within a slice should be fast cleared
- UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice
+ UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice
+
+ union
+ {
+ UINT_32 fastClearSizePerSlice; ///< Size of DCC within a slice should be fast cleared
+ UINT_32 dccRamSliceSize;
+ };
ADDR2_META_MIP_INFO* pMipInfo; ///< DCC mip information
} ADDR2_COMPUTE_DCCINFO_OUTPUT;
@@ -3571,7 +3574,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern(
* ADDR2_BLOCK_SET
*
* @brief
-* Bit field that define block type
+* Bit field that defines block type
****************************************************************************************************
*/
typedef union _ADDR2_BLOCK_SET
@@ -3591,6 +3594,28 @@ typedef union _ADDR2_BLOCK_SET
/**
****************************************************************************************************
+* ADDR2_SWTYPE_SET
+*
+* @brief
+* Bit field that defines swizzle type
+****************************************************************************************************
+*/
+typedef union _ADDR2_SWTYPE_SET
+{
+ struct
+ {
+ UINT_32 sw_Z : 1; // SW_*_Z_*
+ UINT_32 sw_S : 1; // SW_*_S_*
+ UINT_32 sw_D : 1; // SW_*_D_*
+ UINT_32 sw_R : 1; // SW_*_R_*
+ UINT_32 reserved : 28;
+ };
+
+ UINT_32 value;
+} ADDR2_SWTYPE_SET;
+
+/**
+****************************************************************************************************
* ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
*
* @brief
@@ -3607,6 +3632,7 @@ typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
AddrResrouceLocation resourceLoction; ///< Surface heap choice
ADDR2_BLOCK_SET forbiddenBlock; ///< Client can use it to disable some block setting
///< such as linear for DXTn, tiled for YUV
+ ADDR2_SWTYPE_SET preferredSwSet; ///< Client can use it to specify sw type(s) wanted
BOOL_32 noXor; ///< Do not use xor mode for this resource
UINT_32 bpp; ///< bits per pixel
UINT_32 width; ///< Width (of mip0), in pixels
@@ -3632,12 +3658,15 @@ typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
*/
typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
{
- UINT_32 size; ///< Size of this structure in bytes
+ UINT_32 size; ///< Size of this structure in bytes
- AddrSwizzleMode swizzleMode; ///< Suggested swizzle mode to be used
- AddrResourceType resourceType; ///< Suggested resource type to program HW
- ADDR2_BLOCK_SET validBlockSet; ///< Valid block type bit conbination
- BOOL_32 canXor; ///< If client can use xor on a valid macro block type
+ AddrSwizzleMode swizzleMode; ///< Suggested swizzle mode to be used
+ AddrResourceType resourceType; ///< Suggested resource type to program HW
+ ADDR2_BLOCK_SET validBlockSet; ///< Valid block type bit conbination
+ BOOL_32 canXor; ///< If client can use xor on a valid macro block
+ /// type
+ ADDR2_SWTYPE_SET validSwTypeSet; ///< Valid swizzle type bit combination
+ ADDR2_SWTYPE_SET clientPreferredSwSet; ///< Client-preferred swizzle type bit combination
} ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT;
/**
diff --git a/src/amd/addrlib/amdgpu_asic_addr.h b/src/amd/addrlib/amdgpu_asic_addr.h
new file mode 100644
index 00000000000..ea957a88b4d
--- /dev/null
+++ b/src/amd/addrlib/amdgpu_asic_addr.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#ifndef _AMDGPU_ASIC_ADDR_H
+#define _AMDGPU_ASIC_ADDR_H
+
+#define ATI_VENDOR_ID 0x1002
+#define AMD_VENDOR_ID 0x1022
+
+// AMDGPU_VENDOR_IS_AMD(vendorId)
+#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID))
+
+#define FAMILY_UNKNOWN 0x00
+#define FAMILY_TN 0x69
+#define FAMILY_SI 0x6E
+#define FAMILY_CI 0x78
+#define FAMILY_KV 0x7D
+#define FAMILY_VI 0x82
+#define FAMILY_POLARIS 0x82
+#define FAMILY_CZ 0x87
+#define FAMILY_AI 0x8D
+#define FAMILY_RV 0x8E
+
+// AMDGPU_FAMILY_IS(familyId, familyName)
+#define FAMILY_IS(f, fn) (f == FAMILY_##fn)
+#define FAMILY_IS_TN(f) FAMILY_IS(f, TN)
+#define FAMILY_IS_SI(f) FAMILY_IS(f, SI)
+#define FAMILY_IS_CI(f) FAMILY_IS(f, CI)
+#define FAMILY_IS_KV(f) FAMILY_IS(f, KV)
+#define FAMILY_IS_VI(f) FAMILY_IS(f, VI)
+#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS)
+#define FAMILY_IS_CZ(f) FAMILY_IS(f, CZ)
+#define FAMILY_IS_AI(f) FAMILY_IS(f, AI)
+#define FAMILY_IS_RV(f) FAMILY_IS(f, RV)
+
+#define AMDGPU_UNKNOWN 0xFF
+
+#define AMDGPU_TAHITI_RANGE 0x05, 0x14
+#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28
+#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C
+#define AMDGPU_OLAND_RANGE 0x3C, 0x46
+#define AMDGPU_HAINAN_RANGE 0x46, 0xFF
+
+#define AMDGPU_BONAIRE_RANGE 0x14, 0x28
+#define AMDGPU_HAWAII_RANGE 0x28, 0x3C
+
+#define AMDGPU_SPECTRE_RANGE 0x01, 0x41
+#define AMDGPU_SPOOKY_RANGE 0x41, 0x81
+#define AMDGPU_KALINDI_RANGE 0x81, 0xA1
+#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF
+
+#define AMDGPU_ICELAND_RANGE 0x01, 0x14
+#define AMDGPU_TONGA_RANGE 0x14, 0x28
+#define AMDGPU_FIJI_RANGE 0x3C, 0x50
+
+#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A
+#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64
+#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E
+
+#define AMDGPU_CARRIZO_RANGE 0x01, 0x21
+#define AMDGPU_BRISTOL_RANGE 0x10, 0x21
+#define AMDGPU_STONEY_RANGE 0x61, 0xFF
+
+#define AMDGPU_VEGA10_RANGE 0x01, 0x14
+
+#define AMDGPU_RAVEN_RANGE 0x01, 0x81
+
+#define AMDGPU_EXPAND_FIX(x) x
+#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
+#define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
+
+
+// ASICREV_IS(eRevisionId, revisionName)
+#define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
+#define ASICREV_IS_TAHITI_P(r) ASICREV_IS(r, TAHITI)
+#define ASICREV_IS_PITCAIRN_PM(r) ASICREV_IS(r, PITCAIRN)
+#define ASICREV_IS_CAPEVERDE_M(r) ASICREV_IS(r, CAPEVERDE)
+#define ASICREV_IS_OLAND_M(r) ASICREV_IS(r, OLAND)
+#define ASICREV_IS_HAINAN_V(r) ASICREV_IS(r, HAINAN)
+
+#define ASICREV_IS_BONAIRE_M(r) ASICREV_IS(r, BONAIRE)
+#define ASICREV_IS_HAWAII_P(r) ASICREV_IS(r, HAWAII)
+
+#define ASICREV_IS_SPECTRE(r) ASICREV_IS(r, SPECTRE)
+#define ASICREV_IS_SPOOKY(r) ASICREV_IS(r, SPOOKY)
+#define ASICREV_IS_KALINDI(r) ASICREV_IS(r, KALINDI)
+#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI)
+
+#define ASICREV_IS_ICELAND_M(r) ASICREV_IS(r, ICELAND)
+#define ASICREV_IS_TONGA_P(r) ASICREV_IS(r, TONGA)
+#define ASICREV_IS_FIJI_P(r) ASICREV_IS(r, FIJI)
+
+#define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10)
+#define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11)
+#define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12)
+
+#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO)
+#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL)
+#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY)
+
+#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10)
+#define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10)
+
+#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN)
+
+#endif // _AMDGPU_ASIC_ADDR_H
diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h
index 8f5f1bfb374..62f8ac61618 100644
--- a/src/amd/addrlib/core/addrcommon.h
+++ b/src/amd/addrlib/core/addrcommon.h
@@ -36,15 +36,9 @@
#include "addrinterface.h"
-// ADDR_LNX_KERNEL_BUILD is for internal build
-// Moved from addrinterface.h so __KERNEL__ is not needed any more
-#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__))
- #include "lnx_common_defs.h" // ported from cmmqs
-#elif !defined(__APPLE__) || defined(HAVE_TSERVER)
- #include <assert.h>
- #include <stdlib.h>
- #include <string.h>
-#endif
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
#if BRAHMA_BUILD && !defined(DEBUG)
#ifdef NDEBUG
@@ -171,6 +165,8 @@
#endif // DEBUG
////////////////////////////////////////////////////////////////////////////////////////////////////
+#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1]
+
namespace Addr
{
diff --git a/src/amd/addrlib/core/addrelemlib.cpp b/src/amd/addrlib/core/addrelemlib.cpp
index 4bc46e0f585..c9e6da4729a 100644
--- a/src/amd/addrlib/core/addrelemlib.cpp
+++ b/src/amd/addrlib/core/addrelemlib.cpp
@@ -1271,6 +1271,9 @@ VOID ElemLib::RestoreSurfaceInfo(
UINT_32 height;
UINT_32 bpp;
+ BOOL_32 bBCnFormat = FALSE;
+ (void)bBCnFormat;
+
ADDR_ASSERT(pBpp != NULL);
ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
@@ -1289,22 +1292,17 @@ VOID ElemLib::RestoreSurfaceInfo(
break;
case ADDR_PACKED_GBGR:
case ADDR_PACKED_BGRG:
- if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI)
- {
- originalBits = bpp / expandX;
- }
- else
- {
- originalBits = bpp; // 32-bit packed ==> 2 32-bit result
- }
+ originalBits = bpp; // 32-bit packed ==> 2 32-bit result
break;
case ADDR_PACKED_BC1: // Fall through
case ADDR_PACKED_BC4:
originalBits = 64;
+ bBCnFormat = TRUE;
break;
case ADDR_PACKED_BC2: // Fall through
case ADDR_PACKED_BC3: // Fall through
case ADDR_PACKED_BC5:
+ bBCnFormat = TRUE;
// fall through
case ADDR_PACKED_ASTC:
case ADDR_PACKED_ETC2_128BPP:
@@ -1394,27 +1392,11 @@ UINT_32 ElemLib::GetBitsPerPixel(
break;
case ADDR_FMT_GB_GR: // treat as FMT_8_8
elemMode = ADDR_PACKED_GBGR;
- if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI)
- {
- bpp = 32;
- expandX = 2;
- }
- else
- {
- bpp = 16;
- }
+ bpp = 16;
break;
case ADDR_FMT_BG_RG: // treat as FMT_8_8
elemMode = ADDR_PACKED_BGRG;
- if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI)
- {
- bpp = 32;
- expandX = 2;
- }
- else
- {
- bpp = 16;
- }
+ bpp = 16;
break;
case ADDR_FMT_8_8_8_8:
case ADDR_FMT_2_10_10_10:
diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp
index 65fd3451a0d..a6ac5ecf836 100644
--- a/src/amd/addrlib/core/addrlib.cpp
+++ b/src/amd/addrlib/core/addrlib.cpp
@@ -218,7 +218,16 @@ ADDR_E_RETURNCODE Lib::Create(
}
break;
case CIASICIDGFXENGINE_ARCTICISLAND:
- pLib = Gfx9HwlInit(&client);
+ switch (pCreateIn->chipFamily)
+ {
+ case FAMILY_AI:
+ case FAMILY_RV:
+ pLib = Gfx9HwlInit(&client);
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
break;
default:
ADDR_ASSERT_ALWAYS();
diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h
index 20700844272..8db65a61c87 100644
--- a/src/amd/addrlib/core/addrlib.h
+++ b/src/amd/addrlib/core/addrlib.h
@@ -38,11 +38,7 @@
#include "addrobject.h"
#include "addrelemlib.h"
-#if BRAHMA_BUILD
-#include "amdgpu_id.h"
-#else
-#include "atiid.h"
-#endif
+#include "amdgpu_asic_addr.h"
#ifndef CIASICIDGFXENGINE_R600
#define CIASICIDGFXENGINE_R600 0x00000006
@@ -128,6 +124,123 @@ enum BankSwapSize
/**
****************************************************************************************************
+* @brief Enums that define max compressed fragments config
+****************************************************************************************************
+*/
+enum NumMaxCompressedFragmentsConfig
+{
+ ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS = 0x00000000,
+ ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS = 0x00000001,
+ ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS = 0x00000002,
+ ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS = 0x00000003,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define num pipes config
+****************************************************************************************************
+*/
+enum NumPipesConfig
+{
+ ADDR_CONFIG_1_PIPE = 0x00000000,
+ ADDR_CONFIG_2_PIPE = 0x00000001,
+ ADDR_CONFIG_4_PIPE = 0x00000002,
+ ADDR_CONFIG_8_PIPE = 0x00000003,
+ ADDR_CONFIG_16_PIPE = 0x00000004,
+ ADDR_CONFIG_32_PIPE = 0x00000005,
+ ADDR_CONFIG_64_PIPE = 0x00000006,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define num banks config
+****************************************************************************************************
+*/
+enum NumBanksConfig
+{
+ ADDR_CONFIG_1_BANK = 0x00000000,
+ ADDR_CONFIG_2_BANK = 0x00000001,
+ ADDR_CONFIG_4_BANK = 0x00000002,
+ ADDR_CONFIG_8_BANK = 0x00000003,
+ ADDR_CONFIG_16_BANK = 0x00000004,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define num rb per shader engine config
+****************************************************************************************************
+*/
+enum NumRbPerShaderEngineConfig
+{
+ ADDR_CONFIG_1_RB_PER_SHADER_ENGINE = 0x00000000,
+ ADDR_CONFIG_2_RB_PER_SHADER_ENGINE = 0x00000001,
+ ADDR_CONFIG_4_RB_PER_SHADER_ENGINE = 0x00000002,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define num shader engines config
+****************************************************************************************************
+*/
+enum NumShaderEnginesConfig
+{
+ ADDR_CONFIG_1_SHADER_ENGINE = 0x00000000,
+ ADDR_CONFIG_2_SHADER_ENGINE = 0x00000001,
+ ADDR_CONFIG_4_SHADER_ENGINE = 0x00000002,
+ ADDR_CONFIG_8_SHADER_ENGINE = 0x00000003,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define pipe interleave size config
+****************************************************************************************************
+*/
+enum PipeInterleaveSizeConfig
+{
+ ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x00000000,
+ ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x00000001,
+ ADDR_CONFIG_PIPE_INTERLEAVE_1KB = 0x00000002,
+ ADDR_CONFIG_PIPE_INTERLEAVE_2KB = 0x00000003,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define row size config
+****************************************************************************************************
+*/
+enum RowSizeConfig
+{
+ ADDR_CONFIG_1KB_ROW = 0x00000000,
+ ADDR_CONFIG_2KB_ROW = 0x00000001,
+ ADDR_CONFIG_4KB_ROW = 0x00000002,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define bank interleave size config
+****************************************************************************************************
+*/
+enum BankInterleaveSizeConfig
+{
+ ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x00000000,
+ ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x00000001,
+ ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x00000002,
+ ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x00000003,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define engine tile size config
+****************************************************************************************************
+*/
+enum ShaderEngineTileSizeConfig
+{
+ ADDR_CONFIG_SE_TILE_16 = 0x00000000,
+ ADDR_CONFIG_SE_TILE_32 = 0x00000001,
+};
+
+/**
+****************************************************************************************************
* @brief This class contains asic independent address lib functionalities
****************************************************************************************************
*/
diff --git a/src/amd/addrlib/core/addrlib1.cpp b/src/amd/addrlib/core/addrlib1.cpp
index 548b24b7b69..c796a63436c 100644
--- a/src/amd/addrlib/core/addrlib1.cpp
+++ b/src/amd/addrlib/core/addrlib1.cpp
@@ -1281,36 +1281,54 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
if (returnCode == ADDR_OK)
{
- pOut->bpp = ComputeHtileInfo(pIn->flags,
- pIn->pitch,
- pIn->height,
- pIn->numSlices,
- pIn->isLinear,
- isWidth8,
- isHeight8,
- pIn->pTileInfo,
- &pOut->pitch,
- &pOut->height,
- &pOut->htileBytes,
- &pOut->macroWidth,
- &pOut->macroHeight,
- &pOut->sliceSize,
- &pOut->baseAlign);
-
- if (pIn->flags.tcCompatible && (pIn->numSlices > 1))
+ if (pIn->flags.tcCompatible)
{
- pOut->sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8);
-
- const UINT_32 align = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes;
+ const UINT_32 sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8);
+ const UINT_32 align = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes;
- if ((pOut->sliceSize % align) == 0)
+ if (pIn->numSlices > 1)
{
- pOut->sliceInterleaved = FALSE;
+ const UINT_32 surfBytes = (sliceSize * pIn->numSlices);
+
+ pOut->sliceSize = sliceSize;
+ pOut->htileBytes = pIn->flags.skipTcCompatSizeAlign ?
+ surfBytes : PowTwoAlign(surfBytes, align);
+ pOut->sliceInterleaved = ((sliceSize % align) != 0) ? TRUE : FALSE;
}
else
{
- pOut->sliceInterleaved = TRUE;
+ pOut->sliceSize = pIn->flags.skipTcCompatSizeAlign ?
+ sliceSize : PowTwoAlign(sliceSize, align);
+ pOut->htileBytes = pOut->sliceSize;
+ pOut->sliceInterleaved = FALSE;
}
+
+ pOut->nextMipLevelCompressible = ((sliceSize % align) == 0) ? TRUE : FALSE;
+
+ pOut->pitch = pIn->pitch;
+ pOut->height = pIn->height;
+ pOut->baseAlign = align;
+ pOut->macroWidth = 0;
+ pOut->macroHeight = 0;
+ pOut->bpp = 32;
+ }
+ else
+ {
+ pOut->bpp = ComputeHtileInfo(pIn->flags,
+ pIn->pitch,
+ pIn->height,
+ pIn->numSlices,
+ pIn->isLinear,
+ isWidth8,
+ isHeight8,
+ pIn->pTileInfo,
+ &pOut->pitch,
+ &pOut->height,
+ &pOut->htileBytes,
+ &pOut->macroWidth,
+ &pOut->macroHeight,
+ &pOut->sliceSize,
+ &pOut->baseAlign);
}
}
}
@@ -2162,6 +2180,8 @@ VOID Lib::HwlComputeXmaskCoordFromAddr(
{
UINT_32 pipe;
UINT_32 numPipes;
+ UINT_32 numGroupBits;
+ (void)numGroupBits;
UINT_32 numPipeBits;
UINT_32 macroTilePitch;
UINT_32 macroTileHeight;
@@ -2204,6 +2224,7 @@ VOID Lib::HwlComputeXmaskCoordFromAddr(
//
// Compute the number of group and pipe bits.
//
+ numGroupBits = Log2(m_pipeInterleaveBytes);
numPipeBits = Log2(numPipes);
UINT_32 groupBits = 8 * m_pipeInterleaveBytes;
@@ -3504,6 +3525,10 @@ VOID Lib::ComputeMipLevel(
ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure
) const
{
+ // Check if HWL has handled
+ BOOL_32 hwlHandled = FALSE;
+ (void)hwlHandled;
+
if (ElemLib::IsBlockCompressed(pIn->format))
{
if (pIn->mipLevel == 0)
@@ -3517,7 +3542,7 @@ VOID Lib::ComputeMipLevel(
}
}
- HwlComputeMipLevel(pIn);
+ hwlHandled = HwlComputeMipLevel(pIn);
}
/**
diff --git a/src/amd/addrlib/core/addrlib2.cpp b/src/amd/addrlib/core/addrlib2.cpp
index 57505d35af5..ddaf597f9dd 100644
--- a/src/amd/addrlib/core/addrlib2.cpp
+++ b/src/amd/addrlib/core/addrlib2.cpp
@@ -355,6 +355,11 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
{
returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut);
}
+
+ if (returnCode == ADDR_OK)
+ {
+ pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
+ }
}
return returnCode;
@@ -460,8 +465,7 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
*/
ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord(
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode;
@@ -492,8 +496,7 @@ ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord(
*/
ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr(
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode;
@@ -560,8 +563,7 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
*/
ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord(
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode;
@@ -780,8 +782,7 @@ ADDR_E_RETURNCODE Lib::ComputeDccInfo(
*/
ADDR_E_RETURNCODE Lib::ComputeDccAddrFromCoord(
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode;
@@ -1047,77 +1048,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoLinear(
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
) const
{
- ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
- UINT_32 pitch = 0;
- UINT_32 actualHeight = 0;
- UINT_32 elementBytes = pIn->bpp >> 3;
- const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
-
- if (IsTex1d(pIn->resourceType))
- {
- if (pIn->height > 1)
- {
- returnCode = ADDR_INVALIDPARAMS;
- }
- else
- {
- const UINT_32 pitchAlignInElement = alignment / elementBytes;
- pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
- actualHeight = pIn->numMipLevels;
-
- if (pIn->flags.prt == FALSE)
- {
- returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
- &pitch, &actualHeight);
- }
-
- if (returnCode == ADDR_OK)
- {
- if (pOut->pMipInfo != NULL)
- {
- for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
- {
- pOut->pMipInfo[i].offset = pitch * elementBytes * i;
- pOut->pMipInfo[i].pitch = pitch;
- pOut->pMipInfo[i].height = 1;
- pOut->pMipInfo[i].depth = 1;
- }
- }
- }
- }
- }
- else
- {
- returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
- }
-
- if ((pitch == 0) || (actualHeight == 0))
- {
- returnCode = ADDR_INVALIDPARAMS;
- }
-
- if (returnCode == ADDR_OK)
- {
- pOut->pitch = pitch;
- pOut->height = pIn->height;
- pOut->numSlices = pIn->numSlices;
- pOut->mipChainPitch = pitch;
- pOut->mipChainHeight = actualHeight;
- pOut->mipChainSlice = pOut->numSlices;
- pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
- pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
- pOut->surfSize = pOut->sliceSize * pOut->numSlices;
- pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
- pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 * 8 / pIn->bpp);
- pOut->blockHeight = 1;
- pOut->blockSlices = 1;
- }
-
- // Post calculation validate
- ADDR_ASSERT(pOut->sliceSize > 0);
-
- return returnCode;
+ return HwlComputeSurfaceInfoLinear(pIn, pOut);
}
/**
@@ -1170,6 +1101,8 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear(
{
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
+ ADDR2_MIP_INFO mipInfo[MaxMipLevels];
+
localIn.bpp = pIn->bpp;
localIn.flags = pIn->flags;
localIn.width = Max(pIn->unalignedWidth, 1u);
@@ -1177,32 +1110,21 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear(
localIn.numSlices = Max(pIn->numSlices, 1u);
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
localIn.resourceType = pIn->resourceType;
+
if (localIn.numMipLevels <= 1)
{
localIn.pitchInElement = pIn->pitchInElement;
}
+
+ localOut.pMipInfo = mipInfo;
+
returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut);
if (returnCode == ADDR_OK)
{
- UINT_32 elementBytes = pIn->bpp >> 3;
- UINT_64 sliceOffsetInSurf = localOut.sliceSize * pIn->slice;
- UINT_64 mipOffsetInSlice = 0;
- UINT_64 offsetInMip = 0;
-
- if (IsTex1d(pIn->resourceType))
- {
- offsetInMip = static_cast<UINT_64>(pIn->x) * elementBytes;
- mipOffsetInSlice = static_cast<UINT_64>(pIn->mipId) * localOut.pitch * elementBytes;
- }
- else
- {
- UINT_64 mipStartHeight = SumGeo(localIn.height, pIn->mipId);
- mipOffsetInSlice = static_cast<UINT_64>(mipStartHeight) * localOut.pitch * elementBytes;
- offsetInMip = (pIn->y * localOut.pitch + pIn->x) * elementBytes;
- }
-
- pOut->addr = sliceOffsetInSurf + mipOffsetInSlice + offsetInMip;
+ pOut->addr = (localOut.sliceSize * pIn->slice) +
+ mipInfo[pIn->mipId].offset +
+ (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
pOut->bitPosition = 0;
}
else
@@ -1400,73 +1322,6 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrTiled(
/**
************************************************************************************************************************
-* Lib::ComputeSurfaceInfoLinear
-*
-* @brief
-* Internal function to calculate padding for linear swizzle 2D/3D surface
-*
-* @return
-* N/A
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceLinearPadding(
- const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
- UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
- UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
- ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
- ) const
-{
- ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
- UINT_32 elementBytes = pIn->bpp >> 3;
- UINT_32 pitchAlignInElement = 0;
-
- if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
- {
- ADDR_ASSERT(pIn->numMipLevels <= 1);
- ADDR_ASSERT(pIn->numSlices <= 1);
- pitchAlignInElement = 1;
- }
- else
- {
- pitchAlignInElement = (256 / elementBytes);
- }
-
- UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
- UINT_32 slice0PaddedHeight = pIn->height;
-
- returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
- &mipChainWidth, &slice0PaddedHeight);
-
- if (returnCode == ADDR_OK)
- {
- UINT_32 mipChainHeight = 0;
- UINT_32 mipHeight = pIn->height;
-
- for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
- {
- if (pMipInfo != NULL)
- {
- pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
- pMipInfo[i].pitch = mipChainWidth;
- pMipInfo[i].height = mipHeight;
- pMipInfo[i].depth = 1;
- }
-
- mipChainHeight += mipHeight;
- mipHeight = RoundHalf(mipHeight);
- mipHeight = Max(mipHeight, 1u);
- }
-
- *pMipmap0PaddedWidth = mipChainWidth;
- *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
- }
-
- return returnCode;
-}
-
-/**
-************************************************************************************************************************
* Lib::ComputeBlockDimensionForSurf
*
* @brief
diff --git a/src/amd/addrlib/core/addrlib2.h b/src/amd/addrlib/core/addrlib2.h
index e98fddcd2d0..bea2a485a61 100644
--- a/src/amd/addrlib/core/addrlib2.h
+++ b/src/amd/addrlib/core/addrlib2.h
@@ -103,63 +103,63 @@ public:
// For data surface
ADDR_E_RETURNCODE ComputeSurfaceInfo(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
- ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
- const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
- ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+ ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
// For HTile
ADDR_E_RETURNCODE ComputeHtileInfo(
const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
- const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
- const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+ ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut);
// For CMask
ADDR_E_RETURNCODE ComputeCmaskInfo(
const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
- ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
- const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
- const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
- ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
// For FMask
ADDR_E_RETURNCODE ComputeFmaskInfo(
- const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn,
- ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+ const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn,
+ ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
- const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
- const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
- ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
// For DCC key
ADDR_E_RETURNCODE ComputeDccInfo(
const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
- ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeDccAddrFromCoord(
- const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut);
// Misc
ADDR_E_RETURNCODE ComputePipeBankXor(
@@ -197,6 +197,8 @@ protected:
static const UINT_32 PrtAlignment = 64 * 1024;
static const UINT_32 MaxMacroBits = 20;
+ static const UINT_32 MaxMipLevels = 16;
+
// Checking block size
BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const
{
@@ -402,32 +404,32 @@ protected:
}
virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
- const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const
+ const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
- const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const
+ const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
- const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const
+ const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
- const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const
+ const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+ ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut)
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
@@ -532,6 +534,14 @@ protected:
return ADDR_NOTIMPLEMENTED;
}
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
+ {
+ ADDR_NOT_IMPLEMENTED();
+ return ADDR_NOTIMPLEMENTED;
+ }
+
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const
@@ -565,12 +575,6 @@ protected:
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
- ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
- const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
- UINT_32* pMipmap0PaddedWidth,
- UINT_32* pSlice0PaddedHeight,
- ADDR2_MIP_INFO* pMipInfo = NULL) const;
-
ADDR_E_RETURNCODE ComputeSurfaceInfoTiled(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
@@ -757,10 +761,10 @@ protected:
ADDR_E_RETURNCODE ApplyCustomizedPitchHeight(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
- UINT_32 elementBytes,
- UINT_32 pitchAlignInElement,
- UINT_32* pPitch,
- UINT_32* pHeight) const;
+ UINT_32 elementBytes,
+ UINT_32 pitchAlignInElement,
+ UINT_32* pPitch,
+ UINT_32* pHeight) const;
VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
diff --git a/src/amd/addrlib/gfx9/coord.cpp b/src/amd/addrlib/gfx9/coord.cpp
index effdc90017e..228d8f1872b 100644
--- a/src/amd/addrlib/gfx9/coord.cpp
+++ b/src/amd/addrlib/gfx9/coord.cpp
@@ -34,20 +34,20 @@ Coordinate::Coordinate()
ord = 0;
}
-Coordinate::Coordinate(INT_8 c, UINT_32 n)
+Coordinate::Coordinate(INT_8 c, INT_32 n)
{
- set(c,n);
+ set(c, n);
}
-VOID Coordinate::set(INT_8 c, UINT_32 n)
+VOID Coordinate::set(INT_8 c, INT_32 n)
{
dim = c;
ord = static_cast<INT_8>(n);
}
-UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m)
+UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
- UINT_32 bit = 1 << (UINT_32)ord;
+ UINT_32 bit = static_cast<UINT_32>(1ull << static_cast<UINT_32>(ord));
UINT_32 out = 0;
switch (dim)
@@ -234,7 +234,7 @@ UINT_32 CoordTerm::getsize()
return num_coords;
}
-UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m)
+UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
UINT_32 out = 0;
for (UINT_32 i = 0; i < num_coords; i++)
@@ -386,7 +386,7 @@ UINT_32 CoordEq::getsize()
return m_numBits;
}
-UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m)
+UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
UINT_64 out = 0;
for (UINT_32 i = 0; i < m_numBits; i++)
@@ -401,7 +401,7 @@ UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m)
VOID CoordEq::solveAddr(
UINT_64 addr, UINT_32 sliceInM,
- UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m)
+ UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const
{
UINT_32 xBitsValid = 0;
UINT_32 yBitsValid = 0;
diff --git a/src/amd/addrlib/gfx9/coord.h b/src/amd/addrlib/gfx9/coord.h
index 28c57c17fe1..4243d3069a9 100644
--- a/src/amd/addrlib/gfx9/coord.h
+++ b/src/amd/addrlib/gfx9/coord.h
@@ -33,12 +33,12 @@ class Coordinate
{
public:
Coordinate();
- Coordinate(INT_8 c, UINT_32 n);
+ Coordinate(INT_8 c, INT_32 n);
- VOID set(INT_8 c, UINT_32 n);
- UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0);
- INT_8 getdim();
- INT_8 getord();
+ VOID set(INT_8 c, INT_32 n);
+ UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
+ INT_8 getdim();
+ INT_8 getord();
BOOL_32 operator==(const Coordinate& b);
BOOL_32 operator<(const Coordinate& b);
@@ -64,7 +64,7 @@ public:
BOOL_32 Exists(Coordinate& co);
VOID copyto(CoordTerm& cl);
UINT_32 getsize();
- UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0);
+ UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
VOID getsmallest(Coordinate& co);
UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0');
@@ -87,9 +87,9 @@ public:
BOOL_32 Exists(Coordinate& co);
VOID resize(UINT_32 n);
UINT_32 getsize();
- virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0);
+ virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM,
- UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m);
+ UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const;
VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
index edb4c6e636a..e06f13c0afe 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -34,14 +34,8 @@
#include "gfx9addrlib.h"
#include "gfx9_gb_reg.h"
-#include "gfx9_enum.h"
-#if BRAHMA_BUILD
-#include "amdgpu_id.h"
-#else
-#include "ai_id.h"
-#include "rv_id.h"
-#endif
+#include "amdgpu_asic_addr.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -183,7 +177,14 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
}
else
{
- numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ if (m_settings.applyAliasFix)
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
+ }
+ else
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ }
}
numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
@@ -222,6 +223,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+ if (m_settings.htileAlignFix)
+ {
+ sizeAlign <<= 1;
+ }
+
pOut->pitch = numMetaBlkX * metaBlkDim.w;
pOut->height = numMetaBlkY * metaBlkDim.h;
pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
@@ -284,7 +290,14 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
}
else
{
- numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ if (m_settings.applyAliasFix)
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
+ }
+ else
+ {
+ numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+ }
numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
}
@@ -569,8 +582,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
if ((numPipeTotal > 1) || (numRbTotal > 1))
{
+ const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
+
numCompressBlkPerMetaBlk =
- Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024));
+ Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
{
@@ -685,8 +700,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
input.size = sizeof(input);
@@ -710,11 +724,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
- CoordEq metaEq;
-
- GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
- Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
+ Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -724,7 +736,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
pOut->addr = address >> 1;
pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
@@ -754,8 +766,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
@@ -787,11 +798,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- CoordEq metaEq;
-
- GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -801,7 +810,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
pOut->addr = address >> 1;
@@ -830,8 +839,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
- ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
- ) const
+ ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
@@ -862,11 +870,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- CoordEq metaEq;
-
- GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0);
+ const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
pIn->swizzleMode);
@@ -879,7 +885,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 x, y, z, s, m;
- metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
+ pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
pOut->slice = m / sliceSizeInBlock;
pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
@@ -903,7 +909,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
@@ -942,12 +948,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
- CoordEq metaEq;
-
- GetMetaEquation(&metaEq, pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
- Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
- compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2);
+ const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+ Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+ compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2});
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
@@ -957,7 +961,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
+ UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
pOut->addr = address >> 1;
@@ -1184,16 +1188,18 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
m_settings.isArcticIsland = 1;
m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
- if (m_settings.isVega10)
+ m_settings.isDce12 = 1;
+
+ if (m_settings.isVega10 == 0)
{
- m_settings.isDce12 = 1;
+ m_settings.htileAlignFix = 1;
+ m_settings.applyAliasFix = 1;
}
m_settings.metaBaseAlignFix = 1;
m_settings.depthPipeXorDisable = 1;
break;
-
case FAMILY_RV:
m_settings.isArcticIsland = 1;
m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision);
@@ -1205,7 +1211,10 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
m_settings.metaBaseAlignFix = 1;
- m_settings.depthPipeXorDisable = 1;
+ if (ASICREV_IS_RAVEN(uChipRevision))
+ {
+ m_settings.depthPipeXorDisable = 1;
+ }
break;
default:
@@ -1230,6 +1239,7 @@ VOID Gfx9Lib::GetRbEquation(
CoordEq* pRbEq, ///< [out] rb equation
UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
UINT_32 numSeLog2) ///< [in] number of shader engine
+ const
{
// RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
@@ -1250,6 +1260,12 @@ VOID Gfx9Lib::GetRbEquation(
(*pRbEq)[0].add(cy);
cx++;
cy++;
+
+ if (m_settings.applyAliasFix == false)
+ {
+ (*pRbEq)[0].add(cy);
+ }
+
(*pRbEq)[0].add(cy);
start++;
}
@@ -1583,7 +1599,6 @@ VOID Gfx9Lib::GetPipeEquation(
pPipeEq->xorin(xorMask);
}
}
-
/**
************************************************************************************************************************
* Gfx9Lib::GetMetaEquation
@@ -1591,29 +1606,86 @@ VOID Gfx9Lib::GetPipeEquation(
* @brief
* Get meta equation for cmask/htile/DCC
* @return
+* Pointer to a calculated meta equation
+************************************************************************************************************************
+*/
+const CoordEq* Gfx9Lib::GetMetaEquation(
+ const MetaEqParams& metaEqParams)
+{
+ UINT_32 cachedMetaEqIndex;
+
+ for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
+ {
+ if (memcmp(&metaEqParams,
+ &m_cachedMetaEqKey[cachedMetaEqIndex],
+ static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
+ {
+ break;
+ }
+ }
+
+ CoordEq* pMetaEq = NULL;
+
+ if (cachedMetaEqIndex < MaxCachedMetaEq)
+ {
+ pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
+ }
+ else
+ {
+ m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
+
+ pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
+
+ m_metaEqOverrideIndex %= MaxCachedMetaEq;
+
+ GenMetaEquation(pMetaEq,
+ metaEqParams.maxMip,
+ metaEqParams.elementBytesLog2,
+ metaEqParams.numSamplesLog2,
+ metaEqParams.metaFlag,
+ metaEqParams.dataSurfaceType,
+ metaEqParams.swizzleMode,
+ metaEqParams.resourceType,
+ metaEqParams.metaBlkWidthLog2,
+ metaEqParams.metaBlkHeightLog2,
+ metaEqParams.metaBlkDepthLog2,
+ metaEqParams.compBlkWidthLog2,
+ metaEqParams.compBlkHeightLog2,
+ metaEqParams.compBlkDepthLog2);
+ }
+
+ return pMetaEq;
+}
+
+/**
+************************************************************************************************************************
+* Gfx9Lib::GenMetaEquation
+*
+* @brief
+* Get meta equation for cmask/htile/DCC
+* @return
* N/A
************************************************************************************************************************
*/
-VOID Gfx9Lib::GetMetaEquation(
- CoordEq* pMetaEq, ///< [out] meta equation
- UINT_32 maxMip, ///< [in] max mip Id
- UINT_32 elementBytesLog2, ///< [in] data surface element bytes
- UINT_32 numSamplesLog2, ///< [in] data surface sample count
- ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
- Gfx9DataType dataSurfaceType, ///< [in] data surface type
- AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
- AddrResourceType resourceType, ///< [in] data surface resource type
- UINT_32 metaBlkWidthLog2, ///< [in] meta block width
- UINT_32 metaBlkHeightLog2, ///< [in] meta block height
- UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
- UINT_32 compBlkWidthLog2, ///< [in] compress block width
- UINT_32 compBlkHeightLog2, ///< [in] compress block height
- UINT_32 compBlkDepthLog2) ///< [in] compress block depth
+VOID Gfx9Lib::GenMetaEquation(
+ CoordEq* pMetaEq, ///< [out] meta equation
+ UINT_32 maxMip, ///< [in] max mip Id
+ UINT_32 elementBytesLog2, ///< [in] data surface element bytes
+ UINT_32 numSamplesLog2, ///< [in] data surface sample count
+ ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
+ Gfx9DataType dataSurfaceType, ///< [in] data surface type
+ AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
+ AddrResourceType resourceType, ///< [in] data surface resource type
+ UINT_32 metaBlkWidthLog2, ///< [in] meta block width
+ UINT_32 metaBlkHeightLog2, ///< [in] meta block height
+ UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
+ UINT_32 compBlkWidthLog2, ///< [in] compress block width
+ UINT_32 compBlkHeightLog2, ///< [in] compress block height
+ UINT_32 compBlkDepthLog2) ///< [in] compress block depth
const
{
- UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
+ UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
- //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
// Get the correct data address and rb equation
CoordEq dataEq;
@@ -1769,16 +1841,15 @@ VOID Gfx9Lib::GetMetaEquation(
}
}
- UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
- UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
- CoordEq origRbEquation;
+ const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
+ const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
+ const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
+ CoordEq origRbEquation;
GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
CoordEq rbEquation = origRbEquation;
- UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
-
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
@@ -1790,18 +1861,41 @@ VOID Gfx9Lib::GetMetaEquation(
}
}
+ if (m_settings.applyAliasFix)
+ {
+ co.set('z', -1);
+ }
+
// Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
{
- if (rbEquation[i] == pipeEquation[j])
+ BOOL_32 isRbEquationInPipeEquation = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ CoordTerm filteredPipeEq;
+ filteredPipeEq = pipeEquation[j];
+
+ filteredPipeEq.Filter('>', co, 0, 'z');
+
+ isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
+ }
+ else
+ {
+ isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
+ }
+
+ if (isRbEquationInPipeEquation)
{
rbEquation[i].Clear();
}
}
}
+ bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
+
// Loop through each bit of the channel, get the smallest coordinate,
// and remove it from the metaaddr, and rb_equation
for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
@@ -1827,6 +1921,7 @@ VOID Gfx9Lib::GetMetaEquation(
if (pipeEquation[i][k] != co)
{
rbEquation[j].add(pipeEquation[i][k]);
+ rbAppendedWithPipeBits[j] = true;
}
}
}
@@ -1838,7 +1933,18 @@ VOID Gfx9Lib::GetMetaEquation(
UINT_32 rbBitsLeft = 0;
for (UINT_32 i = 0; i < numRbTotalLog2; i++)
{
- if (rbEquation[i].getsize() > 0)
+ BOOL_32 isRbEqAppended = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
+ }
+ else
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > 0);
+ }
+
+ if (isRbEqAppended)
{
rbBitsLeft++;
rbEquation[i].getsmallest(co);
@@ -1860,6 +1966,7 @@ VOID Gfx9Lib::GetMetaEquation(
if (rbEquation[i][k] != co)
{
rbEquation[j].add(rbEquation[i][k]);
+ rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
}
}
}
@@ -1905,7 +2012,18 @@ VOID Gfx9Lib::GetMetaEquation(
// Put in remaining rb bits
for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
{
- if (rbEquation[i].getsize() > 0)
+ BOOL_32 isRbEqAppended = FALSE;
+
+ if (m_settings.applyAliasFix)
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
+ }
+ else
+ {
+ isRbEqAppended = (rbEquation[i].getsize() > 0);
+ }
+
+ if (isRbEqAppended)
{
origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
// Mark any rb bit we add in to the rb mask
@@ -2717,7 +2835,8 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
{
BOOL_32 support = FALSE;
- //const AddrResourceType resourceType = pIn->resourceType;
+ const AddrResourceType resourceType = pIn->resourceType;
+ (void)resourceType;
const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
if (m_settings.isDce12)
@@ -3059,6 +3178,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
};
+ enum AddrSwSet
+ {
+ AddrSwSetZ = 1 << ADDR_SW_Z,
+ AddrSwSetS = 1 << ADDR_SW_S,
+ AddrSwSetD = 1 << ADDR_SW_D,
+ AddrSwSetR = 1 << ADDR_SW_R,
+
+ AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
+ };
+
ADDR_E_RETURNCODE returnCode = ADDR_OK;
ElemLib* pElemLib = GetElemLib();
@@ -3109,10 +3238,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
pOut->resourceType = pIn->resourceType;
}
- ADDR_ASSERT(bpp >= 8u);
- UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
+ if (bpp < 8)
+ {
+ ADDR_ASSERT_ALWAYS();
- if (IsTex1d(pOut->resourceType))
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else if (IsTex1d(pOut->resourceType))
{
pOut->swizzleMode = ADDR_SW_LINEAR;
pOut->validBlockSet.value = AddrBlockSetLinear;
@@ -3123,7 +3255,15 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
ADDR2_BLOCK_SET blockSet;
blockSet.value = 0;
- AddrSwType swType = ADDR_SW_S;
+ ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet;
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet = addrPreferredSwSet;
+ clientPreferredSwSet = pIn->preferredSwSet;
+
+ if (clientPreferredSwSet.value == 0)
+ {
+ clientPreferredSwSet.value = AddrSwSetAll;
+ }
// prt Xor and non-xor will have less height align requirement for stereo surface
BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
@@ -3135,8 +3275,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
{
ADDR_ASSERT(IsTex2d(pOut->resourceType));
- blockSet.value = AddrBlockSetMacro;
- swType = ADDR_SW_Z;
+ blockSet.value = AddrBlockSetMacro;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ;
if (pIn->flags.depth && pIn->flags.texture)
{
@@ -3153,9 +3294,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
}
else if (ElemLib::IsBlockCompressed(pIn->format))
{
- // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. Not sure
- // under what circumstances "_D" would be appropriate as these formats are not
- // displayable.
+ // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
+ // Not sure under what circumstances "_D" would be appropriate as these formats
+ // are not displayable.
blockSet.value = AddrBlockSetMacro;
// This isn't to be used as texture and caller doesn't allow macro tiled.
@@ -3164,15 +3305,19 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
{
blockSet.value |= AddrBlockSetLinear;
}
- swType = ADDR_SW_D;
+
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
}
else if (ElemLib::IsMacroPixelPacked(pIn->format))
{
- // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. Its not
- // clear under what circumstances the D or R modes would be appropriate since
- // these formats are not displayable.
- blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
- swType = ADDR_SW_S;
+ // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
+ // Its notclear under what circumstances the D or R modes would be appropriate
+ // since these formats are not displayable.
+ blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
+
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
}
else if (IsTex3d(pOut->resourceType))
{
@@ -3181,28 +3326,38 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (pIn->flags.prt)
{
// PRT cannot use SW_D which gives an unexpected block dimension
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
}
else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
{
// When depth (Z) is the maximum dimension then must use one of the SW_*_S
// or SW_*_Z modes if mipmapping is desired on a 3D surface
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS;
}
else if (pIn->flags.color)
{
- swType = ADDR_SW_D;
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetS | AddrSwSetD;
}
else
{
- swType = ADDR_SW_Z;
+ addrPreferredSwSet.value = AddrSwSetZ;
+ addrValidSwSet.value = AddrSwSetZ | AddrSwSetD;
+ if (bpp != 128)
+ {
+ addrValidSwSet.value |= AddrSwSetS;
+ }
}
}
else
{
- swType = ((pIn->flags.display == TRUE) ||
- (pIn->flags.overlay == TRUE) ||
- (pIn->bpp == 128)) ? ADDR_SW_D : ADDR_SW_S;
+ addrPreferredSwSet.value = ((pIn->flags.display == TRUE) ||
+ (pIn->flags.overlay == TRUE) ||
+ (pIn->bpp == 128)) ? AddrSwSetD : AddrSwSetS;
+
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR;
if (numMipLevels > 1)
{
@@ -3223,7 +3378,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
if (displayResource)
{
- swType = pIn->flags.rotated ? ADDR_SW_R : ADDR_SW_D;
+ addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD;
if (pIn->bpp > 64)
{
@@ -3238,17 +3393,21 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
// DCE12 does not support display surface to be _T swizzle mode
prtXor = FALSE;
+
+ addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
}
else if (m_settings.isDcn1)
{
// _R is not supported by Dcn1
if (pIn->bpp == 64)
{
- swType = ADDR_SW_D;
+ addrPreferredSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetD;
}
else
{
- swType = ADDR_SW_S;
+ addrPreferredSwSet.value = AddrSwSetS;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
}
blockSet.micro = FALSE;
@@ -3262,279 +3421,325 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
}
}
- if ((numFrags > 1) &&
- (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
- {
- // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
- blockSet.macro4KB = FALSE;
- }
+ ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value);
- if (pIn->flags.prt)
- {
- blockSet.value &= AddrBlockSetMacro64KB;
- }
+ pOut->clientPreferredSwSet = clientPreferredSwSet;
+
+ // Clamp client preferred set to valid set
+ clientPreferredSwSet.value &= addrValidSwSet.value;
- // Apply customized forbidden setting
- blockSet.value &= ~pIn->forbiddenBlock.value;
+ pOut->validSwTypeSet = addrValidSwSet;
- if (pIn->maxAlign > 0)
+ if (clientPreferredSwSet.value == 0)
{
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
+ // Client asks for an invalid swizzle type...
+ ADDR_ASSERT_ALWAYS();
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ if (IsPow2(clientPreferredSwSet.value))
+ {
+ // Only one swizzle type left, use it directly
+ addrPreferredSwSet.value = clientPreferredSwSet.value;
+ }
+ else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0)
{
- blockSet.macro64KB = FALSE;
+ // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
+ if (clientPreferredSwSet.sw_D)
+ {
+ addrPreferredSwSet.value = AddrSwSetD;
+ }
+ else if (clientPreferredSwSet.sw_Z)
+ {
+ addrPreferredSwSet.value = AddrSwSetZ;
+ }
+ else if (clientPreferredSwSet.sw_R)
+ {
+ addrPreferredSwSet.value = AddrSwSetR;
+ }
+ else
+ {
+ ADDR_ASSERT(clientPreferredSwSet.sw_S);
+ addrPreferredSwSet.value = AddrSwSetS;
+ }
}
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
+ if ((numFrags > 1) &&
+ (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
{
+ // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
blockSet.macro4KB = FALSE;
}
- if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
+ if (pIn->flags.prt)
{
- blockSet.micro = FALSE;
+ blockSet.value &= AddrBlockSetMacro64KB;
}
- }
- Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
- Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
- UINT_64 padSize[AddrBlockMaxTiledType] = {0};
+ // Apply customized forbidden setting
+ blockSet.value &= ~pIn->forbiddenBlock.value;
- if (blockSet.micro)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
- &blkAlign[AddrBlockMicro].h,
- &blkAlign[AddrBlockMicro].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_256B);
-
- if (returnCode == ADDR_OK)
+ if (pIn->maxAlign > 0)
{
- if (displayResource)
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
{
- blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
+ blockSet.macro64KB = FALSE;
}
- else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
- (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
+
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
{
- // If one 256B block can contain the surface, don't bother bigger block type
blockSet.macro4KB = FALSE;
- blockSet.macro64KB = FALSE;
- blockSet.var = FALSE;
}
- padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
- slice, &paddedDim[AddrBlockMicro]);
- }
- }
-
- if ((returnCode == ADDR_OK) && blockSet.macro4KB)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
- &blkAlign[AddrBlock4KB].h,
- &blkAlign[AddrBlock4KB].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_4KB);
-
- if (returnCode == ADDR_OK)
- {
- if (displayResource)
+ if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
{
- blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
+ blockSet.micro = FALSE;
}
-
- padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
- slice, &paddedDim[AddrBlock4KB]);
-
- ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
}
- }
- if ((returnCode == ADDR_OK) && blockSet.macro64KB)
- {
- returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
- &blkAlign[AddrBlock64KB].h,
- &blkAlign[AddrBlock64KB].d,
- bpp,
- numFrags,
- pOut->resourceType,
- ADDR_SW_64KB);
+ Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
+ Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
+ UINT_64 padSize[AddrBlockMaxTiledType] = {0};
- if (returnCode == ADDR_OK)
+ if (blockSet.micro)
{
- if (displayResource)
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
+ &blkAlign[AddrBlockMicro].h,
+ &blkAlign[AddrBlockMicro].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_256B);
+
+ if (returnCode == ADDR_OK)
{
- blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
- }
-
- padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
- slice, &paddedDim[AddrBlock64KB]);
-
- ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
- ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
- }
- }
+ if (displayResource)
+ {
+ blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
+ }
+ else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
+ (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
+ {
+ // If one 256B block can contain the surface, don't bother bigger block type
+ blockSet.macro4KB = FALSE;
+ blockSet.macro64KB = FALSE;
+ blockSet.var = FALSE;
+ }
- if (returnCode == ADDR_OK)
- {
- for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
- {
- padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
+ padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
+ slice, &paddedDim[AddrBlockMicro]);
+ }
}
- // Use minimum block type which meets all conditions above if flag minimizeAlign was set
- if (pIn->flags.minimizeAlign)
+ if ((returnCode == ADDR_OK) && blockSet.macro4KB)
{
- // If padded size of 64KB block is larger than padded size of 256B block or 4KB
- // block, filter out 64KB block from candidate list
- if (blockSet.macro64KB &&
- ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
- (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
+ &blkAlign[AddrBlock4KB].h,
+ &blkAlign[AddrBlock4KB].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_4KB);
+
+ if (returnCode == ADDR_OK)
{
- blockSet.macro64KB = FALSE;
- }
+ if (displayResource)
+ {
+ blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
+ }
- // If padded size of 4KB block is larger than padded size of 256B block,
- // filter out 4KB block from candidate list
- if (blockSet.macro4KB &&
- blockSet.micro &&
- (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
- {
- blockSet.macro4KB = FALSE;
+ padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
+ slice, &paddedDim[AddrBlock4KB]);
+
+ ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
}
}
- // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
- else if (pIn->flags.opt4space)
- {
- UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
- (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
- threshold += threshold >> 1;
-
- if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
+ if ((returnCode == ADDR_OK) && blockSet.macro64KB)
+ {
+ returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
+ &blkAlign[AddrBlock64KB].h,
+ &blkAlign[AddrBlock64KB].d,
+ bpp,
+ numFrags,
+ pOut->resourceType,
+ ADDR_SW_64KB);
+
+ if (returnCode == ADDR_OK)
{
- blockSet.macro64KB = FALSE;
- }
+ if (displayResource)
+ {
+ blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
+ }
- if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
- {
- blockSet.macro4KB = FALSE;
+ padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
+ slice, &paddedDim[AddrBlock64KB]);
+
+ ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
+ ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
}
}
- else
+
+ if (returnCode == ADDR_OK)
{
- if (blockSet.macro64KB &&
- (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
- ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
+ UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
+
+ for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
{
- // If 64KB block waste more than half memory on padding, filter it out from
- // candidate list when it is not the only choice left
- blockSet.macro64KB = FALSE;
+ padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
}
- }
- if (blockSet.value == 0)
- {
- // Bad things happen, client will not get any useful information from AddrLib.
- // Maybe we should fill in some output earlier instead of outputing nothing?
- ADDR_ASSERT_ALWAYS();
- returnCode = ADDR_INVALIDPARAMS;
- }
- else
- {
- pOut->validBlockSet = blockSet;
- pOut->canXor = pOut->canXor &&
- (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
-
- if (blockSet.macro64KB || blockSet.macro4KB)
+ // Use minimum block type which meets all conditions above if flag minimizeAlign was set
+ if (pIn->flags.minimizeAlign)
{
- if (swType == ADDR_SW_Z)
- {
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
- }
- else if (swType == ADDR_SW_S)
+ // If padded size of 64KB block is larger than padded size of 256B block or 4KB
+ // block, filter out 64KB block from candidate list
+ if (blockSet.macro64KB &&
+ ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
+ (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
{
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
+ blockSet.macro64KB = FALSE;
}
- else if (swType == ADDR_SW_D)
+
+ // If padded size of 4KB block is larger than padded size of 256B block,
+ // filter out 4KB block from candidate list
+ if (blockSet.macro4KB &&
+ blockSet.micro &&
+ (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
{
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
+ blockSet.macro4KB = FALSE;
}
- else
+ }
+ // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
+ else if (pIn->flags.opt4space)
+ {
+ UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
+ (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
+
+ threshold += threshold >> 1;
+
+ if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
{
- ADDR_ASSERT(swType == ADDR_SW_R);
- pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
+ blockSet.macro64KB = FALSE;
}
- if (prtXor && blockSet.macro64KB)
+ if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
{
- // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
- const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
- pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
+ blockSet.macro4KB = FALSE;
}
- else if (pOut->canXor)
+ }
+ else
+ {
+ if (blockSet.macro64KB &&
+ (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
+ ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
{
- // Client wants XOR and this is allowed, return XOR version swizzle mode
- const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
- pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
+ // If 64KB block waste more than half memory on padding, filter it out from
+ // candidate list when it is not the only choice left
+ blockSet.macro64KB = FALSE;
}
}
- else if (blockSet.micro)
+
+ if (blockSet.value == 0)
+ {
+ // Bad things happen, client will not get any useful information from AddrLib.
+ // Maybe we should fill in some output earlier instead of outputing nothing?
+ ADDR_ASSERT_ALWAYS();
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
{
- if (swType == ADDR_SW_S)
+ pOut->validBlockSet = blockSet;
+ pOut->canXor = pOut->canXor &&
+ (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
+
+ if (blockSet.macro64KB || blockSet.macro4KB)
+ {
+ if (addrPreferredSwSet.value == AddrSwSetZ)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetS)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetD)
+ {
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
+ }
+ else
+ {
+ ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
+ pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
+ }
+
+ if (prtXor && blockSet.macro64KB)
+ {
+ // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
+ const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
+ pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
+ }
+ else if (pOut->canXor)
+ {
+ // Client wants XOR and this is allowed, return XOR version swizzle mode
+ const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
+ pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
+ }
+ }
+ else if (blockSet.micro)
{
- pOut->swizzleMode = ADDR_SW_256B_S;
+ if (addrPreferredSwSet.value == AddrSwSetS)
+ {
+ pOut->swizzleMode = ADDR_SW_256B_S;
+ }
+ else if (addrPreferredSwSet.value == AddrSwSetD)
+ {
+ pOut->swizzleMode = ADDR_SW_256B_D;
+ }
+ else
+ {
+ ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
+ pOut->swizzleMode = ADDR_SW_256B_R;
+ }
}
- else if (swType == ADDR_SW_D)
+ else if (blockSet.linear)
{
- pOut->swizzleMode = ADDR_SW_256B_D;
+ // Fall into this branch doesn't mean linear is suitable, only no other choices!
+ pOut->swizzleMode = ADDR_SW_LINEAR;
}
else
{
- ADDR_ASSERT(swType == ADDR_SW_R);
- pOut->swizzleMode = ADDR_SW_256B_R;
- }
- }
- else if (blockSet.linear)
- {
- // Fall into this branch doesn't mean linear is suitable, only no other choices!
- pOut->swizzleMode = ADDR_SW_LINEAR;
- }
- else
- {
- ADDR_ASSERT(blockSet.var);
+ ADDR_ASSERT(blockSet.var);
- // Designer consider VAR swizzle mode is usless for most cases
- ADDR_UNHANDLED_CASE();
+ // Designer consider VAR swizzle mode is usless for most cases
+ ADDR_UNHANDLED_CASE();
- returnCode = ADDR_NOTSUPPORTED;
- }
+ returnCode = ADDR_NOTSUPPORTED;
+ }
#if DEBUG
- // Post sanity check, at least AddrLib should accept the output generated by its own
- if (pOut->swizzleMode != ADDR_SW_LINEAR)
- {
- ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
- localIn.flags = pIn->flags;
- localIn.swizzleMode = pOut->swizzleMode;
- localIn.resourceType = pOut->resourceType;
- localIn.format = pIn->format;
- localIn.bpp = bpp;
- localIn.width = width;
- localIn.height = height;
- localIn.numSlices = slice;
- localIn.numMipLevels = numMipLevels;
- localIn.numSamples = numSamples;
- localIn.numFrags = numFrags;
-
- HwlComputeSurfaceInfoSanityCheck(&localIn);
-
- // TODO : check all valid block type available in validBlockSet?
- }
+ // Post sanity check, at least AddrLib should accept the output generated by its own
+ if (pOut->swizzleMode != ADDR_SW_LINEAR)
+ {
+ ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
+ localIn.flags = pIn->flags;
+ localIn.swizzleMode = pOut->swizzleMode;
+ localIn.resourceType = pOut->resourceType;
+ localIn.format = pIn->format;
+ localIn.bpp = bpp;
+ localIn.width = width;
+ localIn.height = height;
+ localIn.numSlices = slice;
+ localIn.numMipLevels = numMipLevels;
+ localIn.numSamples = numSamples;
+ localIn.numFrags = numFrags;
+
+ HwlComputeSurfaceInfoSanityCheck(&localIn);
+
+ }
#endif
+ }
}
}
}
@@ -3709,53 +3914,48 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
- pOut->epitchIsHeight = FALSE;
- pOut->mipChainInTail = FALSE;
+ pOut->epitchIsHeight = FALSE;
+ pOut->mipChainInTail = FALSE;
+ pOut->firstMipIdInTail = pIn->numMipLevels;
- pOut->mipChainPitch = pOut->pitch;
- pOut->mipChainHeight = pOut->height;
- pOut->mipChainSlice = pOut->numSlices;
+ pOut->mipChainPitch = pOut->pitch;
+ pOut->mipChainHeight = pOut->height;
+ pOut->mipChainSlice = pOut->numSlices;
if (pIn->numMipLevels > 1)
{
- UINT_32 numMipLevel;
- ADDR2_MIP_INFO *pMipInfo;
- ADDR2_MIP_INFO mipInfo[4];
-
- if (pOut->pMipInfo != NULL)
+ pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
+ pIn->swizzleMode,
+ pIn->bpp,
+ pIn->width,
+ pIn->height,
+ pIn->numSlices,
+ pOut->blockWidth,
+ pOut->blockHeight,
+ pOut->blockSlices,
+ pIn->numMipLevels,
+ pOut->pMipInfo);
+
+ const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
+
+ if (endingMipId == 0)
{
- pMipInfo = pOut->pMipInfo;
- numMipLevel = pIn->numMipLevels;
- }
- else
- {
- pMipInfo = mipInfo;
- numMipLevel = Min(pIn->numMipLevels, 4u);
- }
+ const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
+ pIn->swizzleMode,
+ pOut->blockWidth,
+ pOut->blockHeight,
+ pOut->blockSlices);
- UINT_32 endingMip = GetMipChainInfo(pIn->resourceType,
- pIn->swizzleMode,
- pIn->bpp,
- pIn->width,
- pIn->height,
- pIn->numSlices,
- pOut->blockWidth,
- pOut->blockHeight,
- pOut->blockSlices,
- numMipLevel,
- pMipInfo);
-
- if (endingMip == 0)
- {
pOut->epitchIsHeight = TRUE;
- pOut->pitch = pMipInfo[0].pitch;
- pOut->height = pMipInfo[0].height;
- pOut->numSlices = pMipInfo[0].depth;
+ pOut->pitch = tailMaxDim.w;
+ pOut->height = tailMaxDim.h;
+ pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
+ tailMaxDim.d : pIn->numSlices;
pOut->mipChainInTail = TRUE;
}
else
{
- UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
+ UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
@@ -3767,7 +3967,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
{
UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
- if ((mip1WidthInBlk == 1) && (endingMip > 2))
+ if ((mip1WidthInBlk == 1) && (endingMipId > 2))
{
mip1WidthInBlk++;
}
@@ -3780,7 +3980,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
{
UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
- if ((mip1HeightInBlk == 1) && (endingMip > 2))
+ if ((mip1HeightInBlk == 1) && (endingMipId > 2))
{
mip1HeightInBlk++;
}
@@ -3821,22 +4021,22 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
UINT_64 macroBlockOffset =
blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
- pMipInfo[i].macroBlockOffset = macroBlockOffset;
- pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
+ pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
+ pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
}
}
}
else if (pOut->pMipInfo != NULL)
{
- pOut->pMipInfo[0].pitch = pOut->pitch;
+ pOut->pMipInfo[0].pitch = pOut->pitch;
pOut->pMipInfo[0].height = pOut->height;
- pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
+ pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
pOut->pMipInfo[0].offset = 0;
}
pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
(pIn->bpp >> 3) * pIn->numFrags;
- pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
+ pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
if (pIn->flags.prt)
@@ -3851,6 +4051,95 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
/**
************************************************************************************************************************
+* Gfx9Lib::HwlComputeSurfaceInfoLinear
+*
+* @brief
+* Internal function to calculate alignment for linear surface
+*
+* @return
+* ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+ UINT_32 pitch = 0;
+ UINT_32 actualHeight = 0;
+ UINT_32 elementBytes = pIn->bpp >> 3;
+ const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
+
+ if (IsTex1d(pIn->resourceType))
+ {
+ if (pIn->height > 1)
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ const UINT_32 pitchAlignInElement = alignment / elementBytes;
+
+ pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
+ actualHeight = pIn->numMipLevels;
+
+ if (pIn->flags.prt == FALSE)
+ {
+ returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
+ &pitch, &actualHeight);
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ if (pOut->pMipInfo != NULL)
+ {
+ for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+ {
+ pOut->pMipInfo[i].offset = pitch * elementBytes * i;
+ pOut->pMipInfo[i].pitch = pitch;
+ pOut->pMipInfo[i].height = 1;
+ pOut->pMipInfo[i].depth = 1;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
+ }
+
+ if ((pitch == 0) || (actualHeight == 0))
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ pOut->pitch = pitch;
+ pOut->height = pIn->height;
+ pOut->numSlices = pIn->numSlices;
+ pOut->mipChainPitch = pitch;
+ pOut->mipChainHeight = actualHeight;
+ pOut->mipChainSlice = pOut->numSlices;
+ pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
+ pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
+ pOut->surfSize = pOut->sliceSize * pOut->numSlices;
+ pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
+ pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
+ pOut->blockHeight = 1;
+ pOut->blockSlices = 1;
+ }
+
+ // Post calculation validate
+ ADDR_ASSERT(pOut->sliceSize > 0);
+
+ return returnCode;
+}
+
+/**
+************************************************************************************************************************
* Gfx9Lib::GetMipChainInfo
*
* @brief
@@ -3876,16 +4165,15 @@ UINT_32 Gfx9Lib::GetMipChainInfo(
const Dim3d tailMaxDim =
GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
- UINT_32 mipPitch = mip0Width;
- UINT_32 mipHeight = mip0Height;
- UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
- UINT_32 offset = 0;
- UINT_32 endingMip = numMipLevel - 1;
- BOOL_32 inTail = FALSE;
- BOOL_32 finalDim = FALSE;
-
- BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
- BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
+ UINT_32 mipPitch = mip0Width;
+ UINT_32 mipHeight = mip0Height;
+ UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
+ UINT_32 offset = 0;
+ UINT_32 firstMipIdInTail = numMipLevel;
+ BOOL_32 inTail = FALSE;
+ BOOL_32 finalDim = FALSE;
+ BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
+ BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
{
@@ -3931,10 +4219,9 @@ UINT_32 Gfx9Lib::GetMipChainInfo(
if (inTail)
{
- endingMip = mipId;
-
- mipPitch = tailMaxDim.w;
- mipHeight = tailMaxDim.h;
+ firstMipIdInTail = mipId;
+ mipPitch = tailMaxDim.w;
+ mipHeight = tailMaxDim.h;
if (is3dThick)
{
@@ -3953,10 +4240,14 @@ UINT_32 Gfx9Lib::GetMipChainInfo(
}
}
- pMipInfo[mipId].pitch = mipPitch;
- pMipInfo[mipId].height = mipHeight;
- pMipInfo[mipId].depth = mipDepth;
- pMipInfo[mipId].offset = offset;
+ if (pMipInfo != NULL)
+ {
+ pMipInfo[mipId].pitch = mipPitch;
+ pMipInfo[mipId].height = mipHeight;
+ pMipInfo[mipId].depth = mipDepth;
+ pMipInfo[mipId].offset = offset;
+ }
+
offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
if (finalDim)
@@ -3978,7 +4269,7 @@ UINT_32 Gfx9Lib::GetMipChainInfo(
}
}
- return endingMip;
+ return firstMipIdInTail;
}
/**
@@ -3999,7 +4290,7 @@ VOID Gfx9Lib::GetMetaMiptailInfo(
Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
) const
{
- BOOL_32 isThick = (pMetaBlkDim->d > 1);
+ BOOL_32 isThick = (pMetaBlkDim->d > 1);
UINT_32 mipWidth = pMetaBlkDim->w;
UINT_32 mipHeight = pMetaBlkDim->h >> 1;
UINT_32 mipDepth = pMetaBlkDim->d;
@@ -4557,5 +4848,72 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
return returnCode;
}
+/**
+************************************************************************************************************************
+* Gfx9Lib::ComputeSurfaceInfoLinear
+*
+* @brief
+* Internal function to calculate padding for linear swizzle 2D/3D surface
+*
+* @return
+* N/A
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
+ UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
+ UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
+ ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ UINT_32 elementBytes = pIn->bpp >> 3;
+ UINT_32 pitchAlignInElement = 0;
+
+ if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
+ {
+ ADDR_ASSERT(pIn->numMipLevels <= 1);
+ ADDR_ASSERT(pIn->numSlices <= 1);
+ pitchAlignInElement = 1;
+ }
+ else
+ {
+ pitchAlignInElement = (256 / elementBytes);
+ }
+
+ UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
+ UINT_32 slice0PaddedHeight = pIn->height;
+
+ returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
+ &mipChainWidth, &slice0PaddedHeight);
+
+ if (returnCode == ADDR_OK)
+ {
+ UINT_32 mipChainHeight = 0;
+ UINT_32 mipHeight = pIn->height;
+
+ for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+ {
+ if (pMipInfo != NULL)
+ {
+ pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
+ pMipInfo[i].pitch = mipChainWidth;
+ pMipInfo[i].height = mipHeight;
+ pMipInfo[i].depth = 1;
+ }
+
+ mipChainHeight += mipHeight;
+ mipHeight = RoundHalf(mipHeight);
+ mipHeight = Max(mipHeight, 1u);
+ }
+
+ *pMipmap0PaddedWidth = mipChainWidth;
+ *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
+ }
+
+ return returnCode;
+}
+
} // V2
} // Addr
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.h b/src/amd/addrlib/gfx9/gfx9addrlib.h
index 418ccac5142..1f233a4ff91 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.h
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.h
@@ -65,7 +65,9 @@ struct Gfx9ChipSettings
// Misc configuration bits
UINT_32 metaBaseAlignFix : 1;
UINT_32 depthPipeXorDisable : 1;
- UINT_32 reserved2 : 30;
+ UINT_32 htileAlignFix : 1;
+ UINT_32 applyAliasFix : 1;
+ UINT_32 reserved2 : 28;
};
};
@@ -83,6 +85,28 @@ enum Gfx9DataType
/**
************************************************************************************************************************
+* @brief GFX9 meta equation parameters
+************************************************************************************************************************
+*/
+struct MetaEqParams
+{
+ UINT_32 maxMip;
+ UINT_32 elementBytesLog2;
+ UINT_32 numSamplesLog2;
+ ADDR2_META_FLAGS metaFlag;
+ Gfx9DataType dataSurfaceType;
+ AddrSwizzleMode swizzleMode;
+ AddrResourceType resourceType;
+ UINT_32 metaBlkWidthLog2;
+ UINT_32 metaBlkHeightLog2;
+ UINT_32 metaBlkDepthLog2;
+ UINT_32 compBlkWidthLog2;
+ UINT_32 compBlkHeightLog2;
+ UINT_32 compBlkDepthLog2;
+};
+
+/**
+************************************************************************************************************************
* @brief This class is the GFX9 specific address library
* function set.
************************************************************************************************************************
@@ -139,31 +163,31 @@ protected:
virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
- ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
- ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+ ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
- const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
- const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
- const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
- ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+ ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
- const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const;
+ const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut);
virtual UINT_32 HwlGetEquationIndex(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
@@ -206,17 +230,7 @@ protected:
if (IsXor(swizzleMode))
{
- if (m_settings.isVega10 || m_settings.isRaven)
- {
- baseAlign = GetBlockSize(swizzleMode);
- }
- else
- {
- UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
- UINT_32 pipeBits = GetPipeXorBits(blockSizeLog2);
- UINT_32 bankBits = GetBankXorBits(blockSizeLog2);
- baseAlign = 1 << (Min(blockSizeLog2, m_pipeInterleaveLog2 + pipeBits+ bankBits));
- }
+ baseAlign = GetBlockSize(swizzleMode);
}
else
{
@@ -249,6 +263,10 @@ protected:
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
@@ -352,6 +370,10 @@ protected:
return compressBlkDim;
}
+
+ static const UINT_32 MaxSeLog2 = 3;
+ static const UINT_32 MaxRbPerSeLog2 = 2;
+
static const Dim3d Block256_3dS[MaxNumOfBpp];
static const Dim3d Block256_3dZ[MaxNumOfBpp];
@@ -375,6 +397,8 @@ protected:
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];
+ static const UINT_32 MaxCachedMetaEq = 2;
+
private:
virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(
ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;
@@ -382,7 +406,7 @@ private:
virtual BOOL_32 HwlInitGlobalParams(
const ADDR_CREATE_INPUT* pCreateIn);
- static VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2);
+ VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;
VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
@@ -393,7 +417,7 @@ private:
UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;
- VOID GetMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
+ VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
@@ -401,6 +425,8 @@ private:
UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;
+ const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);
+
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
@@ -408,7 +434,17 @@ private:
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
+ ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ UINT_32* pMipmap0PaddedWidth,
+ UINT_32* pSlice0PaddedHeight,
+ ADDR2_MIP_INFO* pMipInfo = NULL) const;
+
Gfx9ChipSettings m_settings;
+
+ CoordEq m_cachedMetaEq[MaxCachedMetaEq];
+ MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq];
+ UINT_32 m_metaEqOverrideIndex;
};
} // V2
diff --git a/src/amd/addrlib/gfx9/rbmap.cpp b/src/amd/addrlib/gfx9/rbmap.cpp
deleted file mode 100644
index 789140d7c0e..00000000000
--- a/src/amd/addrlib/gfx9/rbmap.cpp
+++ /dev/null
@@ -1,1388 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-// This class generates rb id map based rb id equations
-
-//#define DPI_DEBUG 1
-// Unlock more verbose debug messages (V* borrows from dj -v * to indicate most verbosity)
-//#define DPI_DEBUG_V4 1
-//#define DPI_DEBUG_V5 1
-//#define DPI_DEBUG_PIPE_CASES 1
-// "----+----|----+----|----+----|----+----|"
-#include "addrcommon.h"
-#include "rbmap.h"
-
-RB_MAP::RB_MAP(void)
-{
- Initialize();
-}
-
-VOID RB_MAP::Get_Comp_Block_Screen_Space( CoordEq& addr, int bytes_log2, int* w, int* h, int* d)
-{
- int n, i;
- if( w ) *w = 0;
- if( h ) *h = 0;
- if( d ) *d = 0;
- for( n=0; n<bytes_log2; n++ ) { // go up to the bytes_log2 bit
- for( i=0; (unsigned)i<addr[n].getsize(); i++ ) {
- char dim = addr[n][i].getdim();
- int ord = addr[n][i].getord();
- if( w && dim == 'x' && ord >= *w ) *w = ord+1;
- if( h && dim == 'y' && ord >= *h ) *h = ord+1;
- if( d && dim == 'z' && ord >= *d ) *d = ord+1;
- }
- }
-}
-
-void
-RB_MAP::Get_Meta_Block_Screen_Space( int num_comp_blocks_log2, bool is_thick, bool y_biased,
- int comp_block_width_log2, int comp_block_height_log2, int comp_block_depth_log2,
-
- // Outputs
- int& meta_block_width_log2, int& meta_block_height_log2, int& meta_block_depth_log2 )
-{
- meta_block_width_log2 = comp_block_width_log2;
- meta_block_height_log2 = comp_block_height_log2;
- meta_block_depth_log2 = comp_block_depth_log2;
- int n;
-
- for( n=0; n<num_comp_blocks_log2; n++ ) {
- if( (meta_block_height_log2 < meta_block_width_log2) ||
- (y_biased && (meta_block_height_log2 == meta_block_width_log2)) ) {
- if ( !is_thick || (meta_block_height_log2 <= meta_block_depth_log2) )
- meta_block_height_log2++;
- else
- meta_block_depth_log2++;
- }
- else {
- if ( !is_thick || (meta_block_width_log2 <= meta_block_depth_log2) )
- meta_block_width_log2++;
- else
- meta_block_depth_log2++;
- }
- }
-}
-
-void
-RB_MAP::cap_pipe( int xmode, bool is_thick, int& num_ses_log2, int bpp_log2, int num_samples_log2, int pipe_interleave_log2, int& block_size_log2, int& num_pipes_log2 )
-{
- // pipes+SEs can't exceed 32 for now
- if( num_pipes_log2+num_ses_log2 > 5 ) {
- num_pipes_log2 = 5-num_ses_log2;
- }
-
- // Since we are not supporting SE affinity anymore, just add nu_ses to num_pipes, and set num_ses to 0
- num_pipes_log2 += num_ses_log2;
- num_ses_log2 = 0;
-
- // If block size is set to variable (0), compute the size
- if( block_size_log2 == 0 ) {
- //
- //TODO Temporary disable till RTL can drive Var signals properly
- }
-
- if( xmode != NONE ) {
- int max_pipes_log2 = block_size_log2 - pipe_interleave_log2;
- if( is_thick ) {
- // For 3d, treat the num_pipes as the sum of num_pipes and gpus
- num_pipes_log2 = num_pipes_log2 + num_ses_log2;
- num_ses_log2 = 0;
- } else {
- int block_space_used = num_pipes_log2+pipe_interleave_log2;
- if( block_space_used < 10+bpp_log2 ) block_space_used = 10+bpp_log2;
- // if the num gpus exceeds however many bits we have left between block size and block_space_used+num_samples
- // then set num_ses_log2 to 0
- if( num_ses_log2 > block_size_log2 - block_space_used - num_samples_log2) {
- num_pipes_log2 = num_pipes_log2 + num_ses_log2;
- num_ses_log2 = 0;
- }
- }
- if( num_pipes_log2 > max_pipes_log2 ) {
- // If it exceeds the space we have left, cap it to that
- num_pipes_log2 = max_pipes_log2;
- }
- } else {
- num_pipes_log2 = num_pipes_log2 + num_ses_log2;
- num_ses_log2 = 0;
- }
-}
-
-void RB_MAP::Get_Data_Offset_Equation( CoordEq& data_eq, int data_type, int bpp_log2, int num_samples_log2, int block_size_log2 )
-{
- bool is_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
- bool is_color = ( data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_s = ( data_type == DATA_COLOR3D_S );
- Coordinate cx( 'x', 0 );
- Coordinate cy( 'y', 0 );
- Coordinate cz( 'z', 0 );
- Coordinate cs( 's', 0 );
- // Clear the equation
- data_eq.resize(0);
- data_eq.resize(27);
- if( block_size_log2 == 0 ) block_size_log2 = 16;
-
- if( is_linear ) {
- Coordinate cm( 'm', 0 );
- int i;
- data_eq.resize(49);
- for( i=0; i<49; i++ ) {
- data_eq[i].add(cm);
- cm++;
- }
- } else if( is_thick ) {
- // Color 3d (_S and _Z modes; _D is same as color 2d)
- int i;
- if( is_s ) {
- // Standard 3d swizzle
- // Fill in bottom x bits
- for( i=bpp_log2; i<4; i++ ) {
- data_eq[i].add(cx);
- cx++;
- }
- // Fill in 2 bits of y and then z
- for( i=4; i<6; i++ ) {
- data_eq[i].add(cy);
- cy++;
- }
- for( i=6; i<8; i++ ) {
- data_eq[i].add(cz);
- cz++;
- }
- if (bpp_log2 < 2) {
- // fill in z & y bit
- data_eq[8].add(cz);
- data_eq[9].add(cy);
- cz++;
- cy++;
- } else if( bpp_log2 == 2 ) {
- // fill in y and x bit
- data_eq[8].add(cy);
- data_eq[9].add(cx);
- cy++;
- cx++;
- } else {
- // fill in 2 x bits
- data_eq[8].add(cx);
- cx++;
- data_eq[9].add(cx);
- cx++;
- }
- } else {
- // Z 3d swizzle
- int m2d_end = (bpp_log2==0) ? 3 : ((bpp_log2 < 4) ? 4 : 5);
- int num_zs = (bpp_log2==0 || bpp_log2==4) ? 2 : ((bpp_log2==1) ? 3 : 1);
- data_eq.mort2d( cx, cy, bpp_log2, m2d_end );
- for( i=m2d_end+1; i<=m2d_end+num_zs; i++ ) {
- data_eq[i].add(cz);
- cz++;
- }
- if( bpp_log2 == 0 || bpp_log2 == 3 ) {
- // add an x and z
- data_eq[6].add(cx);
- data_eq[7].add(cz);
- cx++;
- cz++;
- } else if( bpp_log2 == 2 ) {
- // add a y and z
- data_eq[6].add(cy);
- data_eq[7].add(cz);
- cy++;
- cz++;
- }
- // add y and x
- data_eq[8].add(cy);
- data_eq[9].add(cx);
- cy++;
- cx++;
- }
- // Fill in bit 10 and up
- data_eq.mort3d( cz, cy, cx, 10 );
- } else if( is_color ) {
- // Color 2D
- int micro_y_bits = (8-bpp_log2) / 2;
- int tile_split_start = block_size_log2 - num_samples_log2;
- int i;
- // Fill in bottom x bits
- for( i=bpp_log2;i<4; i++ ) {
- data_eq[i].add(cx);
- cx++;
- }
- // Fill in bottom y bits
- for( i=4; i<4+micro_y_bits; i++ ) {
- data_eq[i].add(cy);
- cy++;
- }
- // Fill in last of the micro_x bits
- for( i=4+micro_y_bits; i<8; i++ ) {
- data_eq[i].add(cx);
- cx++;
- }
- // Fill in x/y bits below sample split
- data_eq.mort2d( cy, cx, 8, tile_split_start-1 );
- // Fill in sample bits
- for( i=0; i<num_samples_log2; i++ ) {
- cs.set( 's', i );
- data_eq[tile_split_start+i].add(cs);
- }
- // Fill in x/y bits above sample split
- if( (num_samples_log2 & 1) ^ (block_size_log2 & 1) ) data_eq.mort2d( cx, cy, block_size_log2 );
- else data_eq.mort2d( cy, cx, block_size_log2 );
- } else {
- // Z, stencil or fmask
- // First, figure out where each section of bits starts
- int sample_start = bpp_log2;
- int pixel_start = bpp_log2 + num_samples_log2;
- int y_maj_start = 6 + num_samples_log2;
-
- // Put in sample bits
- int s;
- for( s=0; s<num_samples_log2; s++ ) {
- cs.set( 's', s );
- data_eq[sample_start+s].add(cs);
- }
- // Put in the x-major order pixel bits
- data_eq.mort2d( cx, cy, pixel_start, y_maj_start-1 );
- // Put in the y-major order pixel bits
- data_eq.mort2d( cy, cx, y_maj_start );
- }
-}
-
-void RB_MAP::Get_RB_Equation( CoordEq& rb_equation, int num_ses_log2, int num_rbs_log2 )
-{
- // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
- int rb_region = (num_rbs_log2 == 0) ? 5 : 4;
- Coordinate cx( 'x', rb_region );
- Coordinate cy( 'y', rb_region );
- int i, start = 0, num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
- // Clear the rb equation
- rb_equation.resize(0);
- rb_equation.resize(num_total_rbs_log2);
- if( num_ses_log2 > 0 && num_rbs_log2 == 1 ) {
- // Special case when more than 1 SE, and only 1 RB per SE
- rb_equation[0].add(cx);
- rb_equation[0].add(cy);
- cx++;
- cy++;
- rb_equation[0].add(cy);
- start++;
- }
- for( i=0; i<2*(num_total_rbs_log2-start); i++ ) {
- int index = start + (((start+i)>=num_total_rbs_log2) ? 2*(num_total_rbs_log2-start)-i-1 : i);
- Coordinate& c = ((i % 2) == 1) ? cx : cy;
- rb_equation[index].add(c);
- c++;
- }
-}
-
-//void getcheq( CoordEq& pipe_equation, CoordEq& addr, int pipe_interleave_log2, int num_pipes_log2,
-void
-RB_MAP::Get_Pipe_Equation( CoordEq& pipe_equation, CoordEq& addr,
- int pipe_interleave_log2,
- int num_pipes_log2,
-
- int block_size_log2,
- int num_samples_log2,
-
- int xmode, int data_type
- )
-{
- int pipe;
- CoordEq addr_f, xormask, xormask2;
- Coordinate tile_min( 'x', 3 );
-
- bool is_color = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR2D_LINEAR || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
-
- // For color, filter out sample bits only
- // otherwise filter out everything under an 8x8 tile
- if( is_color )
- tile_min.set( 'x', 0 );
-
- addr.copy( addr_f );
-
- // Z/stencil is no longer tile split
- if( is_color )
- addr_f.shift( -num_samples_log2, block_size_log2- num_samples_log2 );
-
- int i;
- addr_f.copy( pipe_equation, pipe_interleave_log2, num_pipes_log2 ); //@todo kr needs num_ses_log2??
-
-
- // This section should only apply to z/stencil, maybe fmask
- // If the pipe bit is below the comp block size, then keep moving up the address until we find a bit that is above
- for( pipe=0; addr_f[pipe_interleave_log2 + pipe][0] < tile_min; pipe++ ) {
- }
-
- // if pipe is 0, then the first pipe bit is above the comp block size, so we don't need to do anything
- // Note, this if condition is not necessary, since if we execute the loop when pipe==0, we will get the same pipe equation
- if ( pipe != 0 ) {
- int j = pipe;
-
-
- for( i=0; i<num_pipes_log2; i++ ) {
- // Copy the jth bit above pipe interleave to the current pipe equation bit
- addr_f[pipe_interleave_log2 + j].copyto(pipe_equation[i]);
- j++;
-
-
- }
-
-
- }
-
- if( xmode == PRT ) {
- // Clear out bits above the block size if prt's are enabled
- addr_f.resize(block_size_log2);
- addr_f.resize(48);
- }
-
- if( xmode != NONE ) {
- if( is_thick ) {
- addr_f.copy( xormask2, pipe_interleave_log2+num_pipes_log2, 2*num_pipes_log2 );
-
- xormask.resize( num_pipes_log2 );
- for( pipe=0; pipe<num_pipes_log2; pipe++ ) {
- xormask[pipe].add( xormask2[2*pipe] );
- xormask[pipe].add( xormask2[2*pipe+1] );
- }
- } else {
- Coordinate co;
- // Xor in the bits above the pipe+gpu bits
- addr_f.copy( xormask, pipe_interleave_log2 + pipe + num_pipes_log2, num_pipes_log2 );
- if( num_samples_log2 == 0 && (xmode != PRT) ) {
- // if 1xaa and not prt, then xor in the z bits
- xormask2.resize(0);
- xormask2.resize(num_pipes_log2);
- for( pipe=0; pipe<num_pipes_log2; pipe++ ) {
- co.set( 'z', num_pipes_log2-1 - pipe );
- xormask2[pipe].add( co );
- }
-
- pipe_equation.xorin( xormask2 );
- }
- }
-
- xormask.reverse();
- pipe_equation.xorin( xormask );
-
- }
-}
-
-void RB_MAP::get_meta_miptail_coord( int& x, int& y, int& z, int mip_in_tail, int blk_width_log2, int blk_height_log2, int blk_depth_log2 )
-{
- bool is_thick = (blk_depth_log2>0);
- int m;
- int mip_width = 1 << blk_width_log2;
- int mip_height = 1 << (blk_height_log2-1);
- int mip_depth = 1 << blk_depth_log2;
-
- // Find the minimal increment, based on the block size and 2d/3d
- int min_inc;
- if(is_thick) {
- min_inc = (blk_height_log2 >= 9) ? 128 : ((blk_height_log2 == 8) ? 64 : 32);
- } else if(blk_height_log2>=10) {
- min_inc = 256;
- } else if(blk_height_log2==9) {
- min_inc = 128;
- } else {
- min_inc = 64;
- }
-
- for( m=0; m<mip_in_tail; m++ ) {
- if( mip_width <= 32 ) {
- // special case when below 32x32 mipmap
- switch(mip_in_tail-m) {
- case 0: break; // 32x32
- case 1: x+=32; break; // 16x16
- case 2: y+=32; break; // 8x8
- case 3: y+=32; x+=16; break;// 4x4
- case 4: y+=32; x+=32; break;// 2x2
- case 5: y+=32; x+=48; break;// 1x1
- // The following are for BC/ASTC formats
- case 6: y+=48; break; // 1/2 x 1/2
- case 7: y+=48; x+=16; break;// 1/4 x 1/4
- case 8: y+=48; x+=32; break;// 1/8 x 1/8
- default:y+=48; x+=48; break;// 1/16 x 1/16
- }
- m = mip_in_tail; // break the loop
- } else {
- if( mip_width <= min_inc ) {
- // if we're below the minimal increment...
- if( is_thick ) {
- // For 3d, just go in z direction
- z += mip_depth;
- } else {
- // For 2d, first go across, then down
- if( mip_width * 2 == min_inc ) {
- // if we're 2 mips below, that's when we go back in x, and down in y
- x -= min_inc;
- y += min_inc;
- } else {
- // otherwise, just go across in x
- x += min_inc;
- }
- }
- } else {
- // On even mip, go down, otherwise, go across
- if( m&1 ) {
- x += mip_width;
- } else {
- y += mip_height;
- }
- }
- // Divide the width by 2
- mip_width = mip_width / 2;
- // After the first mip in tail, the mip is always a square
- mip_height = mip_width;
- // ...or for 3d, a cube
- if(is_thick) mip_depth = mip_width;
- }
- }
-}
-
-void RB_MAP::get_mip_coord( int& x, int& y, int& z, int mip,
- int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
- int data_blk_width_log2, int data_blk_height_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip,
- int data_type, int bpp_log2, bool meta_linear )
-{
- if( meta_linear ) {
- get_mip_coord_linear( x, y, z, mip, data_blk_width_log2, data_blk_height_log2,
- surf_width, surf_height, surf_depth, epitch, max_mip, data_type, bpp_log2 );
- } else {
- get_mip_coord_nonlinear( x, y, z, mip, meta_blk_width_log2, meta_blk_height_log2, meta_blk_depth_log2,
- surf_width, surf_height, surf_depth, epitch, max_mip, data_type );
- }
-}
-
-void RB_MAP::get_mip_coord_linear( int& x, int& y, int& z,
- int mip,
- int data_blk_width_log2, int data_blk_height_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch,
- int max_mip, int data_type, int bpp_log2
- )
-{
- bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
-
- if( data_linear ) {
- // linear width is padded out to 256 Bytes
- int width_padding = 8 - bpp_log2;
- int width_pad_mask = ~(0xffffffff << width_padding);
- int padded_surf_width = surf_width;
- int padded_surf_height = (data_type == DATA_COLOR1D) ? 1 : surf_height;
-
- if( max_mip > 0 ) {
- int mip_width = padded_surf_width;
- int mip_height = padded_surf_height;
- int padded_mip_height = 0;
- int mip_base = 0;
- int m = 0;
- while( (mip_width >= 1 || mip_height >= 1) && m <= max_mip ) {
- if( mip == m ) mip_base = padded_mip_height;
- padded_mip_height += mip_height;
- m++;
- mip_width = (mip_width / 2) + (mip_width & 1);
- mip_height = (mip_height / 2) + (mip_height & 1);
- }
- if( mip >= m ) {
- // assert error
- mip_base = padded_mip_height - mip_height;
- }
- padded_surf_height = padded_mip_height;
-
- if(epitch > 0){
- padded_surf_height = epitch;
- }
- y += mip_base;
- padded_surf_width = ((surf_width >> width_padding) + ((surf_width & width_pad_mask) ? 1 : 0)) << width_padding;
- }
- else{
- padded_surf_width = ((surf_width >> width_padding) + ((surf_width & width_pad_mask) ? 1 : 0)) << width_padding;
-
- // Pad up epitch to meta block width
- if( (epitch & width_pad_mask) != 0 ) {
- epitch = ((epitch >> width_padding) + 1) << width_padding;
- }
- // Take max of epitch and computed surf width
- if( epitch < padded_surf_width ) {
- // assert error
- } else {
- padded_surf_width = epitch;
- }
- }
-
- surf_width = padded_surf_width;
- surf_height = padded_surf_height;
- }
- else {
- // padding based data block size
- int width_pad_mask = ~(0xffffffff << data_blk_width_log2);
- int height_pad_mask = ~(0xffffffff << data_blk_height_log2);
-
- // Pad the data surface dimensions by the block dimensions, and put the result in compressed block dimension units
- surf_width = ((surf_width >> data_blk_width_log2) + ((surf_width & width_pad_mask) ? 1 : 0)) << data_blk_width_log2;
- surf_height = ((surf_height >> data_blk_height_log2) + ((surf_height & height_pad_mask) ? 1 : 0)) << data_blk_height_log2;
-
- // Tiled data, linear metadata
- if( max_mip > 0 ) {
- // we don't allow mipmapping on tiled data, with linear metadata
- // assert error
- }
-
- // Pad up epitch to data block width
- if( (epitch & width_pad_mask) != 0 ) {
- epitch = ((epitch >> data_blk_width_log2) + 1) << data_blk_width_log2;
- }
- // Take max of epitch and computed surf width
- if( epitch < surf_width ) {
- // assert error
- } else {
- surf_width = epitch;
- }
- }
-}
-
-void RB_MAP::get_mip_coord_nonlinear( int& x, int& y, int& z,
- int mip,
- int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
-
- // Outputs
- int& surf_width, int& surf_height, int& surf_depth,
-
- int epitch, int max_mip, int data_type
- )
-{
- bool is3d = (data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- int order; // 0 = xmajor, 1 = ymajor, 2 = zmajor
-
- int mip_width = surf_width;
- int mip_height = surf_height;
- int mip_depth = (is3d) ? surf_depth : 1;
-
- // Divide surface w/h/d by block size, padding if needed
- surf_width = (((surf_width & ((1<<meta_blk_width_log2 )-1)) != 0) ? 1 : 0) + (surf_width >> meta_blk_width_log2);
- surf_height = (((surf_height & ((1<<meta_blk_height_log2)-1)) != 0) ? 1 : 0) + (surf_height >> meta_blk_height_log2);
- surf_depth = (((surf_depth & ((1<<meta_blk_depth_log2 )-1)) != 0) ? 1 : 0) + (surf_depth >> meta_blk_depth_log2);
- epitch = (((epitch & ((1<<meta_blk_width_log2 )-1)) != 0) ? 1 : 0) + (epitch >> meta_blk_width_log2);
-
- if( max_mip > 0 ) {
- // Determine major order
- if( is3d && surf_depth > surf_width && surf_depth > surf_height ) {
- order = 2; // Z major
- }
- else if( surf_width >= surf_height ) {
- order = 0; // X major
- }
- else {
- order = 1; // Y major
- }
-
- // Check if mip 0 is in the tail
- bool in_tail = (mip_width <= (1<<meta_blk_width_log2)) &&
- (mip_height <= (1<<(meta_blk_height_log2-1))) &&
- (!is3d || (mip_depth <= (1<<meta_blk_depth_log2)));
- // Pad the mip w/h/d, which is just the surf w/h/d times blk dim
- mip_width = surf_width << meta_blk_width_log2;
- mip_height = surf_height << meta_blk_height_log2;
- mip_depth = surf_depth << meta_blk_depth_log2;
-
- if( !in_tail ) {
- // Select the dimension that stores the mip chain, based on major order
- // Then pad it out to max(2, ceil(mip_dim/2))
- int& mip_dim = (order == 1) ? surf_width : surf_height;
- // in y-major, if height > 2 blocks, then we need extra padding;
- // in x or z major, it only occurs if width/depth is greater than 4 blocks
- // Height is special, since we can enter the mip tail when height is 1/2 block high
- int order_dim_limit = (order == 1) ? 2 : 4;
- int& order_dim = (order == 0) ? surf_width : ((order == 1) ? surf_height : surf_depth);
- if( mip_dim < 3 && order_dim > order_dim_limit && max_mip >= 3 ) mip_dim += 2;
- else mip_dim += (mip_dim/2) + (mip_dim&1);
- }
-
- int m;
- for( m=0; m<mip; m++ ) {
- if( in_tail ) {
- get_meta_miptail_coord( x, y, z, mip-m, meta_blk_width_log2, meta_blk_height_log2, meta_blk_depth_log2 );
- m = mip; // break the loop
- } else {
- // Move either x, y, or z by the mip dimension based on which mip we're on and the order
- if(m>=3 || m&1) {
- switch(order) {
- case 0: x += mip_width; break;
- case 1: y += mip_height; break;
- case 2: z += mip_depth; break;
- }
- } else {
- switch(order) {
- case 0: y += mip_height; break;
- case 1: x += mip_width; break;
- case 2: y += mip_height; break;
- }
- }
- // Compute next mip's dimensions
- mip_width = (mip_width/2);
- mip_height = (mip_height/2);
- mip_depth = (mip_depth/2);
- // See if it's in the tail
- in_tail = (mip_width <= (1<<meta_blk_width_log2)) &&
- (mip_height <= (1<<(meta_blk_height_log2-1))) &&
- (!is3d || (mip_depth <= (1<<meta_blk_depth_log2)));
- // Pad out mip dimensions
- mip_width = ((mip_width >> meta_blk_width_log2) + ((mip_width & ((1<<meta_blk_width_log2) -1)) != 0)) << meta_blk_width_log2;
- mip_height = ((mip_height >> meta_blk_height_log2) + ((mip_height & ((1<<meta_blk_height_log2)-1)) != 0)) << meta_blk_height_log2;
- mip_depth = ((mip_depth >> meta_blk_depth_log2) + ((mip_depth & ((1<<meta_blk_depth_log2) -1)) != 0)) << meta_blk_depth_log2;
- }
- }
- } else {
- // Take max of epitch and computed surf width
- surf_width = (surf_width > epitch) ? surf_width : epitch;
- }
-
- // Multiply the surface dimension by block size
- surf_width = surf_width << meta_blk_width_log2;
- surf_height = surf_height << meta_blk_height_log2;
- surf_depth = surf_depth << meta_blk_depth_log2;
-
-}
-
-void
-RB_MAP::get_meta_eq( CoordEq& metaaddr,
- int max_mip, int num_ses_log2, int num_rbs_log2,
- int &num_pipes_log2,
- int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
- int pipe_interleave_log2,
- int xmode,
- int data_type,
- int meta_alignment, bool meta_linear)
-{
- // Metaaddressing
- Coordinate co;
- CoordEq cur_rbeq, pipe_equation, orig_pipe_equation;
-
- bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
- bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- //bool is3d = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
-
- bool is_fmask = (data_type == DATA_FMASK);
- bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
- bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
-
- bool is_mipmapped = (max_mip > 0) ? true : false;
-
- int pipe_mask = 0x0;
- int comp_frag_log2 = (is_color && (num_samples_log2 > max_comp_frag_log2)) ? max_comp_frag_log2 : num_samples_log2;
-
- int uncomp_frag_log2 = num_samples_log2 - comp_frag_log2;
-
- // Constraints on linear
- if ( data_linear ) {
- xmode = NONE;
- num_samples_log2 = 0;
- is_rb_aligned = false;
- meta_linear = true;
- }
- if( meta_linear && !data_linear ) {
- is_pipe_aligned = false;
- }
-
- // Min metablock size if thick is 64KB, otherwise 4KB
- int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
-
- // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
- int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
-
- int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
-
- // Get the total # of RB's before modifying due to rb align
- int num_total_rbs_pre_rb_align_log2 = num_ses_log2 + num_rbs_log2;
-
- // Cap the pipe bits to block size
- int num_ses_data_log2 = num_ses_log2;
- cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2,
- num_samples_log2, pipe_interleave_log2, block_size_log2, num_pipes_log2 );
-
- // if not pipe aligned, set num_pipes_log2, num_ses_log2 to 0
- if( !is_pipe_aligned ) {
- num_pipes_log2 = 0;
- num_ses_data_log2 = 0;
- }
-
- // Get the correct data address and rb equation
- CoordEq dataaddr;
- Get_Data_Offset_Equation( dataaddr,
- (meta_linear) ? DATA_COLOR1D : data_type,
- bpp_log2, num_samples_log2, block_size_log2 );
-
-
- // if not rb aligned, set num_ses_log2/rbs_log2 to 0; note, this is done after generating the data equation
- if( !is_rb_aligned ) {
- num_ses_log2 = 0;
- num_rbs_log2 = 0;
- }
-
- // Get pipe and rb equations
- Get_Pipe_Equation( pipe_equation, dataaddr, pipe_interleave_log2,
- num_pipes_log2, block_size_log2, num_samples_log2, xmode, data_type );
-
- CoordEq& this_rbeq = rb_equation[num_ses_log2][num_rbs_log2];
-
- num_pipes_log2 = pipe_equation.getsize();
-
- if( meta_linear ) {
- dataaddr.copy( metaaddr );
- if( data_linear ) {
- if( is_pipe_aligned ) {
- // Remove the pipe bits
- metaaddr.shift( -num_pipes_log2, pipe_interleave_log2 );
- }
- // Divide by comp block size, which for linear (which is always color) is 256 B
- metaaddr.shift( -8 );
- if( is_pipe_aligned ) {
- // Put pipe bits back in
- metaaddr.shift( num_pipes_log2, pipe_interleave_log2 );
- int i;
- for( i=0; i<num_pipes_log2; i++ ) {
- pipe_equation[i].copyto(metaaddr[pipe_interleave_log2+i]);
- }
- }
- }
- metaaddr.shift( 1 );
- return;
- }
-
- int i, j, k, old_size, new_size;
- int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
-
- // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
- int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
- int comp_blk_size_log2 = 8;
-
- // For color surfaces, compute the comp block width, height, and depth
- // For non-color surfaces, compute the comp block size
- if( is_color ) {
- Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
- metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
- }
- else {
- comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
- }
-
- // Compute meta block width and height
- int num_comp_blks_per_meta_blk;
- if (num_pipes_log2==0 && num_ses_log2==0 && num_rbs_log2==0) {
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
- else {
- num_comp_blks_per_meta_blk = num_total_rbs_pre_rb_align_log2 + ((is_thick) ? 18 : 10);
-
- if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2)
- num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
-
- if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
-
- int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
- Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk, is_thick, is_mipmapped, // mipmaps should be y-biased
- comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
- meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2 );
-
- // Make sure the metaaddr is cleared
- metaaddr.resize(0);
- metaaddr.resize(27);
-
- //------------------------------------------------------------------------------------------------------------------------
- // Use the growing square or growing cube order for thick as a starting point for the metadata address
- //------------------------------------------------------------------------------------------------------------------------
- if( is_thick ) {
- Coordinate cx( 'x', 0 );
- Coordinate cy( 'y', 0 );
- Coordinate cz( 'z', 0 );
- if(is_mipmapped) {
- metaaddr.mort3d( cy, cx, cz );
- } else {
- metaaddr.mort3d( cx, cy, cz );
- }
- }
- else {
- Coordinate cx( 'x', 0 );
- Coordinate cy( 'y', 0 );
- Coordinate cs;
-
- if(is_mipmapped) {
- metaaddr.mort2d( cy, cx, comp_frag_log2 );
- } else {
- metaaddr.mort2d( cx, cy, comp_frag_log2 );
- }
-
- //------------------------------------------------------------------------------------------------------------------------
- // Put the compressible fragments at the lsb
- // the uncompressible frags will be at the msb of the micro address
- //------------------------------------------------------------------------------------------------------------------------
- int s;
- for( s=0; s<comp_frag_log2; s++ ) {
- cs.set( 's', s );
- metaaddr[s].add(cs);
- }
- }
-
- // Keep a copy of the pipe and rb equations
- this_rbeq.copy( cur_rbeq );
- pipe_equation.copy( orig_pipe_equation );
-
- // filter out everything under the compressed block size
- co.set( 'x', comp_blk_width_log2 );
- metaaddr.Filter( '<', co, 0, 'x' );
- co.set( 'y', comp_blk_height_log2 );
- metaaddr.Filter( '<', co, 0, 'y' );
- co.set( 'z', comp_blk_depth_log2 );
- metaaddr.Filter( '<', co, 0, 'z' );
- // For non-color, filter out sample bits
- if( !is_color ) {
- co.set( 'x', 0 );
- metaaddr.Filter( '<', co, 0, 's' );
- }
-
- // filter out everything above the metablock size
- co.set( 'x', meta_block_width_log2-1 );
- metaaddr.Filter( '>', co, 0, 'x' );
- co.set( 'y', meta_block_height_log2-1 );
- metaaddr.Filter( '>', co, 0, 'y' );
- co.set( 'z', meta_block_depth_log2-1 );
- metaaddr.Filter( '>', co, 0, 'z' );
-
- // filter out everything above the metablock size for the channel bits
- co.set( 'x', meta_block_width_log2-1 );
- pipe_equation.Filter( '>', co, 0, 'x' );
- co.set( 'y', meta_block_height_log2-1 );
- pipe_equation.Filter( '>', co, 0, 'y' );
- co.set( 'z', meta_block_depth_log2-1 );
- pipe_equation.Filter( '>', co, 0, 'z' );
-
- // Make sure we still have the same number of channel bits
- if( pipe_equation.getsize() != static_cast<UINT_32>(num_pipes_log2) ) {
- // assert
- }
-
- // Loop through all channel and rb bits, and make sure these components exist in the metadata address
- for( i=0; i<num_pipes_log2; i++ ) {
- for( j=pipe_equation[i].getsize()-1; j>=0; j-- ) {
- if( !metaaddr.Exists( pipe_equation[i][j] ) ) {
- // assert
- }
- }
- }
- for( i=0; i<num_total_rbs_log2; i++ ) {
- for( j=cur_rbeq[i].getsize()-1; j>=0; j-- ) {
- if( !metaaddr.Exists( cur_rbeq[i][j] ) ) {
- // assert
- }
- }
- }
-
- // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
- int old_rb_bits_left = num_total_rbs_log2;
- for( i=0; i<num_total_rbs_log2; i++ ) {
- for(j=0; j<num_pipes_log2; j++ ) {
- if( cur_rbeq[i] == pipe_equation[j] ) {
- cur_rbeq[i].Clear();
- old_rb_bits_left--;
- // Mark which pipe bit caused the RB bit to be dropped
- pipe_mask |= (1 << j);
- }
- }
- }
-
- // Loop through each bit of the channel, get the smallest coordinate, and remove it from the metaaddr, and rb_equation
- for( i=0; i<num_pipes_log2; i++ ) {
- pipe_equation[i].getsmallest( co );
-
- old_size = metaaddr.getsize();
- metaaddr.Filter( '=', co );
- new_size = metaaddr.getsize();
- if( new_size != old_size-1 ) {
- // assert warning
- }
- pipe_equation.remove( co );
- for( j=0; j<num_total_rbs_log2; j++ ) {
- if( cur_rbeq[j].remove( co ) ) {
- // if we actually removed something from this bit, then add the remaining
- // channel bits, as these can be removed for this bit
- for( k=0; (unsigned)k<pipe_equation[i].getsize(); k++ ) {
- if( pipe_equation[i][k] != co ) {
- cur_rbeq[j].add( pipe_equation[i][k] );
- }
- }
- // if the rb bit is still empty, then we have to mark all pipe bits as affecting the RB
- if( cur_rbeq[j].getsize() == 0 ) {
- pipe_mask = (1 << num_pipes_log2) - 1;
- }
- }
- }
- }
-
- // Loop through the rb bits and see what remain; filter out the smallest coordinate if it remains
- int rb_bits_left = 0;
- for( i=0; i<num_total_rbs_log2; i++ ) {
- if( cur_rbeq[i].getsize() > 0 ) {
- rb_bits_left++;
- cur_rbeq[i].getsmallest( co );
- old_size = metaaddr.getsize();
- metaaddr.Filter( '=', co );
- new_size = metaaddr.getsize();
- if( new_size != old_size-1 ) {
- // assert warning
- }
- for( j=i+1; j<num_total_rbs_log2; j++ ) {
- if( cur_rbeq[j].remove( co ) ) {
- // if we actually removed something from this bit, then add the remaining
- // rb bits, as these can be removed for this bit
- for( k=0; (unsigned)k<cur_rbeq[i].getsize(); k++ ) {
- if( cur_rbeq[i][k] != co ) {
- cur_rbeq[j].add( cur_rbeq[i][k] );
- }
- }
- }
- }
- }
- }
-
- // capture the size of the metaaddr
- i = metaaddr.getsize();
- // resize to 49 bits...make this a nibble address
- metaaddr.resize(49);
- // Concatenate the macro address above the current address
- for( j=0; i<49; i++, j++ ) {
- co.set( 'm', j );
- metaaddr[i].add( co );
- }
-
- // Multiply by meta element size (in nibbles)
- if( is_color ) {
- metaaddr.shift( 1 ); // Byte size element
- } else if( data_type == DATA_Z_STENCIL ) {
- metaaddr.shift( 3 ); // 4 Byte size elements
- }
-
- //------------------------------------------------------------------------------------------------------------------------
- // Note the pipe_interleave_log2+1 is because address is a nibble address
- // Shift up from pipe interleave number of channel and rb bits left, and uncompressed fragments
- //------------------------------------------------------------------------------------------------------------------------
-
- metaaddr.shift( num_pipes_log2 + rb_bits_left + uncomp_frag_log2,
- pipe_interleave_log2+1 );
-
- // Put in the channel bits
- for( i=0; i<num_pipes_log2; i++ ) {
- orig_pipe_equation[i].copyto( metaaddr[pipe_interleave_log2+1 + i] );
- }
-
- // Put in remaining rb bits
- i = 0;
- for( j=0; j<rb_bits_left; i=(i+1) % num_total_rbs_log2 ) {
- if( cur_rbeq[i].getsize() > 0 ) {
- rb_equation[num_ses_log2][num_rbs_log2][i].copyto( metaaddr[pipe_interleave_log2+1 + num_pipes_log2 + j] );
- // Mark any rb bit we add in to the rb mask
- j++;
- }
- }
-
- //------------------------------------------------------------------------------------------------------------------------
- // Put in the uncompressed fragment bits
- //------------------------------------------------------------------------------------------------------------------------
- for( i=0; i<uncomp_frag_log2; i++ ) {
- co.set( 's', comp_frag_log2+i );
- metaaddr[pipe_interleave_log2+1 + num_pipes_log2 + rb_bits_left + i].add( co );
- }
-
-
- //------------------------------------------------------------------------------------------------------------------------
- // Check that the metadata SE bits match the data address
- //------------------------------------------------------------------------------------------------------------------------
- for( i=0; i<num_ses_data_log2; i++ ) {
- if(num_total_rbs_log2-num_ses_data_log2+i >= 0){
- if( metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] != dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] ||
- metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] != rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i]) {
- //FIXME: Removed to prevent logs from growing large in size // cout << "Warning: GPU bit " << i << " differs from data addr or RB equation on " << data_name << title << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << " Data: " << dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << "MData: " << metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << " RBeq: " << rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i] << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << " Pipe: " << orig_pipe_equation << endl;
- //FIXME: Removed to prevent logs from growing large in size // cout << " DEq: " << dataaddr << endl;
- }
- }
- }
-}
-
-long
-RB_MAP::get_meta_addr_calc( int x, int y, int z, int s,
- long surf_base, int element_bytes_log2, int num_samples_log2, int max_comp_frag_log2,
- long pitch, long slice,
- int max_mip,
-
- //int swizzle_mode,
- int xmode, int pipe_xor, int block_size_log2,
-
- /*int num_banks_log2,*/
- int num_pipes_log2,
- int pipe_interleave_log2,
-
- int meta_alignment,
- int dim_type,
- int x_mip_org, int y_mip_org, int z_mip_org,
-
- int num_ses_log2, int num_rbs_log2,
- /*bool se_affinity_enable, */
-
- int data_type,
-
- int l2_metablk_w, int l2_metablk_h, int l2_metablk_d,
- bool meta_linear
- )
-{
- int bpp_log2 = element_bytes_log2;
- int mip_base_x = x_mip_org;
- int mip_base_y = y_mip_org;
- int mip_base_z = z_mip_org;
-
- CoordEq metaaddr;
-
- //bool se_affinity_enable = false;
- //int max_pipe_bytes = std::max(1<<num_pipes_log2 * 1<<pipe_interleave_log2, 1024 * 1<<log2_element_bytes);
- //int max_banks_samples = std::max(1<<num_banks_log2, 1<<num_samples_log2);
- //int block_size_log2 = max(4096, max_pipe_bytes * max_bank_samples * 1<<num_ses_log2);
-
- bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
- bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
- bool is_fmask = (data_type == DATA_FMASK);
-
- bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
- bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
-
- if ( data_linear )
- meta_linear = true;
-
- if ( !data_linear && meta_linear)
- max_mip = 0;
-
- // Min metablock size if thick is 64KB, otherwise 4KB
- int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
-
- // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
- int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
- int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
-
- int num_ses_data_log2 = num_ses_log2;
- int block_size_data_log2 = block_size_log2;
- int num_pipes_data_log2 = num_pipes_log2;
-
- //int num_banks_data_log2 = num_banks_log2;
- cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, num_samples_log2, pipe_interleave_log2, block_size_data_log2, num_pipes_data_log2/*, num_banks_data_log2 */);
-
- // Get the correct data address and rb equation
- CoordEq dataaddr;
- Get_Data_Offset_Equation( dataaddr, data_type, bpp_log2, num_samples_log2, block_size_data_log2 );
-
- get_meta_eq( metaaddr, max_mip, num_ses_log2, num_rbs_log2, num_pipes_log2, /*num_banks_log2,*/ block_size_log2,
- bpp_log2, num_samples_log2, max_comp_frag_log2, pipe_interleave_log2, xmode,
- data_type, meta_alignment, meta_linear);
- // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
- int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
- int comp_blk_size_log2 = 8;
-
- if ( is_color ){
- Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
- metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
- }
- else {
- comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
- }
-
- // Compute meta block width and height
- int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
- int num_comp_blks_per_meta_blk;
- if((!is_pipe_aligned || num_pipes_log2==0) && (!is_rb_aligned || (num_ses_log2==0 && num_rbs_log2==0))) {
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
- else {
- num_comp_blks_per_meta_blk = num_total_rbs_log2 + ((is_thick) ? 18 : 10);
- if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
- if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
-
- int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
-
- //@@todo kr missing meta_block_width*
-
- // Get the data block size
- int data_block_width_log2, data_block_height_log2, data_block_depth_log2;
-
- Get_Meta_Block_Screen_Space( block_size_log2 - comp_blk_size_log2,
- is_thick, true,
- comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
- data_block_width_log2, data_block_height_log2, data_block_depth_log2 );
-
- meta_block_width_log2 = l2_metablk_w;
- meta_block_height_log2 = l2_metablk_h;
- meta_block_depth_log2 = l2_metablk_d;
-
- int meta_x = mip_base_x + x ;
- int meta_y = mip_base_y + y ;
- int meta_z = mip_base_z + z ;
-
- if( meta_linear ){
- if(!data_linear) {
- // Tiled data, linear metadata
- meta_x = meta_x >> comp_blk_width_log2;
- meta_y = meta_y >> comp_blk_height_log2;
- meta_z = meta_z >> comp_blk_depth_log2;
- pitch = pitch >> comp_blk_width_log2;
- slice = slice >> (comp_blk_width_log2 + comp_blk_height_log2);
- }
- else{
- meta_x = meta_x << bpp_log2;
- meta_y = meta_y << bpp_log2;
- meta_z = meta_z << bpp_log2;
- }
- }
- else{
- meta_x = meta_x >> meta_block_width_log2;
- meta_y = meta_y >> meta_block_height_log2;
- meta_z = meta_z >> meta_block_depth_log2;
-
- pitch = pitch >> meta_block_width_log2;
- slice = slice >> (meta_block_width_log2 + meta_block_height_log2);
- }
-
- long macroaddr = (long)meta_x + (long)meta_y*(long)pitch + (long)meta_z*(long)slice;
-
- int mip_tail_x, mip_tail_y, mip_tail_z;
- mip_tail_x = mip_base_x & ((1 << meta_block_width_log2 )-1);
- mip_tail_y = mip_base_y & ((1 << meta_block_height_log2)-1);
- mip_tail_z = mip_base_z & ((1 << meta_block_depth_log2)-1);
-
- int mip_x = x + mip_tail_x;
- int mip_y = y + mip_tail_y;
- int mip_z = z + mip_tail_z;
-
- // the pipe_interleave_log2+1 is because we are dealing with nibble addresses
- long pipe_xor_mask = (pipe_xor & ((1 << num_pipes_data_log2)-1)) << (pipe_interleave_log2+1);
-
- // shift surf_base to make it a nibble address
- long meta_offset_from_base_nibble_address = metaaddr.solve( mip_x, mip_y, mip_z, s, macroaddr );
-
- long address = (surf_base << 1) + (meta_offset_from_base_nibble_address ^ pipe_xor_mask);
-
- return address;
-}
-
-#if 0
-long
-RB_MAP::get_meta_addr( int x, int y, int z, int s, int mip,
- int surf_width, int surf_height, int surf_depth, int lpitch,
- long surf_base, int pipe_xor, int max_mip,
- int num_ses_log2, int num_rbs_log2, int num_pipes_log2,
- int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
- int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear)
-{
- CoordEq metaaddr;
-
- bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR );
- bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED );
- bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z );
- bool is_fmask = (data_type == DATA_FMASK);
-
- bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB);
- bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB);
-
- bool is_mipmapped = (max_mip > 0) ? true : false;
-
- if( data_linear ) meta_linear = true;
- // Don't allow mipmapping on the tiled data, meta linear case
- // or if we have linear 2d/3d surface
-
- #ifdef ADDRESS__LPITCH_DISABLE__0
- if( (!data_linear && meta_linear) || (data_type == DATA_COLOR2D_LINEAR) ) max_mip = 0;
- #else
- if( !data_linear && meta_linear) max_mip = 0;
- #endif
-
- // Min metablock size if thick is 64KB, otherwise 4KB
- int min_meta_block_size_log2 = (is_thick) ? 16 : 12;
-
-
- // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil
- int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2);
- int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2;
-
- // Cap the pipe bits to block size
- int num_ses_data_log2 = num_ses_log2;
- int block_size_data_log2 = block_size_log2;
- int num_pipes_data_log2 = num_pipes_log2;
-
- cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, num_samples_log2, pipe_interleave_log2, block_size_data_log2, num_pipes_data_log2 );
-
- // Get the correct data address and rb equation
- CoordEq dataaddr;
- Get_Data_Offset_Equation( dataaddr, data_type, bpp_log2, num_samples_log2, block_size_data_log2 );
-
- get_meta_eq( metaaddr, max_mip, num_ses_log2, num_rbs_log2, num_pipes_log2, block_size_log2,
- bpp_log2, num_samples_log2, max_comp_frag_log2, pipe_interleave_log2, xmode, data_type,
- meta_alignment, meta_linear);
-
- // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region
- int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0;
- int comp_blk_size_log2 = 8;
-
- if ( is_color ) {
- Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 );
- metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces
- } else {
- comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2;
- }
-
- // Compute meta block width and height
- int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2;
-
- int num_comp_blks_per_meta_blk;
- if((!is_pipe_aligned || num_pipes_log2==0) && (!is_rb_aligned || (num_ses_log2==0 && num_rbs_log2==0))) {
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
- else {
- num_comp_blks_per_meta_blk = num_total_rbs_log2 + ((is_thick) ? 18 : 10);
-
- if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2;
-
- if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk )
- num_comp_blks_per_meta_blk = metadata_words_per_page_log2;
- }
-
- int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2;
-
-
- Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk, is_thick, is_mipmapped,
- comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
- meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2 );
-
- // Get the data block size
- int data_block_width_log2, data_block_height_log2, data_block_depth_log2;
-
- Get_Meta_Block_Screen_Space( block_size_log2 - comp_blk_size_log2, is_thick, true,
- comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2,
- data_block_width_log2, data_block_height_log2, data_block_depth_log2 );
-
- int meta_x, meta_y, meta_z;
- int meta_surf_width = surf_width;
- int meta_surf_height = surf_height;
- int meta_surf_depth = surf_depth;
-
- int mip_base_x=0, mip_base_y=0, mip_base_z=0;
- get_mip_coord( mip_base_x, mip_base_y, mip_base_z, mip,
- meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2,
- data_block_width_log2, data_block_height_log2,
- meta_surf_width, meta_surf_height, meta_surf_depth, lpitch, max_mip,
- data_type, bpp_log2, meta_linear );
-
- meta_x = mip_base_x + x;
- meta_y = mip_base_y + y;
- meta_z = mip_base_z + z;
-
- if( meta_linear ) {
- if( !data_linear ) {
- // Tiled data, linear metadata
- meta_x = meta_x >> comp_blk_width_log2;
- meta_y = meta_y >> comp_blk_height_log2;
- meta_z = meta_z >> comp_blk_depth_log2;
- meta_surf_width = meta_surf_width >> comp_blk_width_log2;
- meta_surf_height = meta_surf_height >> comp_blk_height_log2;
- }
- else{
- meta_x = meta_x << bpp_log2;
- meta_y = meta_y << bpp_log2;
- meta_z = meta_z << bpp_log2;
- }
- } else {
- meta_x = meta_x >> meta_block_width_log2;
- meta_y = meta_y >> meta_block_height_log2;
- meta_z = meta_z >> meta_block_depth_log2;
- meta_surf_width = meta_surf_width >> meta_block_width_log2;
- meta_surf_height = meta_surf_height >> meta_block_height_log2;
- }
-
- long macroaddr = (long)meta_x + (long)meta_y*(long)meta_surf_width + (long)meta_z*(long)meta_surf_width*(long)meta_surf_height;
-
- int mip_tail_x, mip_tail_y, mip_tail_z;
- mip_tail_x = mip_base_x & ((1 << meta_block_width_log2 )-1);
- mip_tail_y = mip_base_y & ((1 << meta_block_height_log2)-1);
- mip_tail_z = mip_base_z & ((1 << meta_block_depth_log2)-1);
-
- int mip_x = x + mip_tail_x;
- int mip_y = y + mip_tail_y;
- int mip_z = z + mip_tail_z;
-
- // the pipe_interleave_log2+1 is because we are dealing with nibble addresses
- long pipe_xor_mask = (pipe_xor & ((1 << num_pipes_data_log2)-1)) << (pipe_interleave_log2+1);
-
- // shift surf_base to make it a nibble address
- long address = (surf_base << 1) + (metaaddr.solve( mip_x, mip_y, mip_z, s, macroaddr ) ^ pipe_xor_mask);
-
- return address;
-}
-#endif
-
-void
-RB_MAP::Initialize()
-{
- int num_se_log2, num_rb_per_se_log2;
- for( num_se_log2=0; num_se_log2<5; num_se_log2++ ) {
- for( num_rb_per_se_log2=0; num_rb_per_se_log2<3; num_rb_per_se_log2++ ) {
- Get_RB_Equation( rb_equation[num_se_log2][num_rb_per_se_log2], num_se_log2, num_rb_per_se_log2 );
- }
- }
-
- int pix_size_log2, num_samples_log2;
- for( pix_size_log2=0; pix_size_log2<4; pix_size_log2++ ) {
- for( num_samples_log2=0; num_samples_log2<4; num_samples_log2++ ) {
- Get_Data_Offset_Equation( zaddr[pix_size_log2][num_samples_log2], DATA_Z_STENCIL, pix_size_log2, num_samples_log2, 16 );
- }
- }
-
- for( pix_size_log2=0; pix_size_log2<5; pix_size_log2++ ) {
- for( num_samples_log2=0; num_samples_log2<4; num_samples_log2++ ) {
- Get_Data_Offset_Equation( caddr[pix_size_log2][num_samples_log2], DATA_COLOR2D, pix_size_log2, num_samples_log2, 16 );
- }
- }
-
- for( pix_size_log2=0; pix_size_log2<5; pix_size_log2++ ) {
- Get_Data_Offset_Equation( c3addr[pix_size_log2][0], DATA_COLOR3D_S, pix_size_log2, 0, 16 );
- Get_Data_Offset_Equation( c3addr[pix_size_log2][1], DATA_COLOR3D_Z, pix_size_log2, 0, 16 );
- }
-}
-
diff --git a/src/amd/addrlib/gfx9/rbmap.h b/src/amd/addrlib/gfx9/rbmap.h
deleted file mode 100644
index 89c8922d3fe..00000000000
--- a/src/amd/addrlib/gfx9/rbmap.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-// This class RB_MAP contains the top-level calculation functions which are used to generate rb id map based rb id equations
-
-#ifndef __RB_MAP_H
-#define __RB_MAP_H
-
-#include "coord.h"
-
-class RB_MAP
-{
-public:
-
- enum MAX_VALUES {
- MAX_SES_LOG2 = 3,
- MAX_RBS_LOG2 = 2
- };
-
- enum COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2 {
- COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2_2D = 10,
- COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2_3D = 18
- };
-
- RB_MAP(void);
-
- void Get_Comp_Block_Screen_Space( CoordEq& addr, int bytes_log2, int* w, int* h, int* d = NULL);
-
- void Get_Meta_Block_Screen_Space( int num_comp_blocks_log2, bool is_thick, bool y_biased,
- int comp_block_width_log2, int comp_block_height_log2, int comp_block_depth_log2,
- int& meta_block_width_log2, int& meta_block_height_log2, int& meta_block_depth_log2 );
- void cap_pipe( int xmode, bool is_thick, int& num_ses_log2, int bpp_log2, int num_samples_log2, int pipe_interleave_log2,
- int& block_size_log2, int& num_pipes_log2 );
-
- void Get_Data_Offset_Equation( CoordEq& data_eq, int data_type, int bpp_log2, int num_samples_log2, int block_size_log2 );
-
- void Get_RB_Equation( CoordEq& rb_equation, int num_ses_log2, int num_rbs_log2 );
-
- void Get_Pipe_Equation( CoordEq& pipe_equation, CoordEq& addr,
- int pipe_interleave_log2,
- int num_pipes_log2,
- int block_size_log2,
- int num_samples_log2,
- int xmode, int data_type
- );
-
- void get_meta_miptail_coord( int& x, int& y, int& z, int mip_in_tail, int blk_width_log2, int blk_height_log2, int blk_depth_log2 );
-
- void get_mip_coord( int& x, int& y, int& z, int mip,
- int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
- int data_blk_width_log2, int data_blk_height_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip,
- int data_type, int bpp_log2, bool meta_linear );
-
- void get_mip_coord_linear( int& x, int& y, int& z, int mip, int data_blk_width_log2, int data_blk_height_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip, int data_type, int bpp_log2 );
-
- void get_mip_coord_nonlinear( int& x, int& y, int& z, int mip, int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2,
- int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip, int data_type );
-
- void get_meta_eq( CoordEq& metaaddr, int max_mip, int num_ses_log2, int num_rbs_log2, int &num_pipes_log2,
- int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
- int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear);
-
-#if 0
- long get_meta_addr( int x, int y, int z, int s, int mip,
- int surf_width, int surf_height, int surf_depth, int epitch,
- long surf_base, int pipe_xor, int max_mip,
- int num_ses_log2, int num_rbs_log2, int num_pipes_log2,
- int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2,
- int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear);
-#endif
-
- long get_meta_addr_calc( int x, int y, int z, int s,
- long surf_base, int element_bytes_log2, int num_samples_log2, int max_comp_frag_log2,
- long pitch, long slice,
- int max_mip,
- //int swizzle_mode,
- int xmode, int pipe_xor, int block_size_log2,
- /*int num_banks_log2,*/ int num_pipes_log2,
- int pipe_interleave_log2, int meta_alignment, int dim_type, int x_mip_org, int y_mip_org,
- int z_mip_org, int num_ses_log2, int num_rbs_log2, /*bool se_affinity_enable,*/ int data_type,
- int l2_metablk_w, int l2_metablk_h, int l2_metablk_d, bool meta_linear);
-
- void Initialize(void);
-
-public:
- enum XOR_RANGE {
- NONE = 0,
- XOR = 1,
- PRT = 2
- };
-
-
- enum DATA_TYPE_ENUM {
- DATA_COLOR1D,
- DATA_COLOR2D,
- DATA_COLOR3D_S,
- DATA_COLOR3D_Z,
- DATA_Z_STENCIL,
- DATA_FMASK,
- DATA_COLOR2D_LINEAR,
- DATA_COLOR3D_D_NOT_USED // should not be used; use COLOR2D instead
- };
-
- enum META_ALIGNMENT {
- META_ALIGN_NONE,
- META_ALIGN_PIPE,
- META_ALIGN_RB,
- META_ALIGN_PIPE_RB
- };
-
- CoordEq rb_equation[MAX_SES_LOG2+1][MAX_RBS_LOG2+1];
- CoordEq zaddr [4][4];
- CoordEq caddr [5][4];
- CoordEq c3addr[5][2];
-};
-
-#endif
diff --git a/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h b/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h
index 823710cc189..49cc65a7dcb 100644
--- a/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h
+++ b/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h
@@ -27,6 +27,14 @@
* of the Software.
*/
+#include "util/u_endian.h"
+
+#if defined(PIPE_ARCH_LITTLE_ENDIAN)
+#define LITTLEENDIAN_CPU
+#elif defined(PIPE_ARCH_BIG_ENDIAN)
+#define BIGENDIAN_CPU
+#endif
+
//
// Make sure the necessary endian defines are there.
//
diff --git a/src/amd/addrlib/inc/chip/r800/si_gb_reg.h b/src/amd/addrlib/inc/chip/r800/si_gb_reg.h
index cf67f602bdf..793edbc6280 100644
--- a/src/amd/addrlib/inc/chip/r800/si_gb_reg.h
+++ b/src/amd/addrlib/inc/chip/r800/si_gb_reg.h
@@ -27,6 +27,14 @@
* of the Software.
*/
+#include "util/u_endian.h"
+
+#if defined(PIPE_ARCH_LITTLE_ENDIAN)
+#define LITTLEENDIAN_CPU
+#elif defined(PIPE_ARCH_BIG_ENDIAN)
+#define BIGENDIAN_CPU
+#endif
+
//
// Make sure the necessary endian defines are there.
//
diff --git a/src/amd/addrlib/inc/lnx_common_defs.h b/src/amd/addrlib/inc/lnx_common_defs.h
deleted file mode 100644
index 61540f49b7e..00000000000
--- a/src/amd/addrlib/inc/lnx_common_defs.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-#ifndef _lnx_common_defs_h_
-#define _lnx_common_defs_h_
-
-#if DBG
-#include <stdarg.h> // We do not have any choice: need variable
- // number of parameters support for debug
- // build.
-#endif // #if DBG
-
-//
-// -------------- External functions from Linux kernel driver ----------------
-//
-// Note: The definitions/declararions below must match the original ones.
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef unsigned long __ke_size_t; // as it is defined in firegl_public.h
-typedef int __kernel_ptrdiff_t; // as it is defined in posix_types.h
-
-
-#if !defined(ATI_API_CALL)
-#define ATI_API_CALL __attribute__((regparm(0)))
-#endif
-
-extern void * ATI_API_CALL __ke_memset(void* s, int c, __ke_size_t count);
-extern void * ATI_API_CALL __ke_memcpy(void* d, const void* s, __ke_size_t count);
-extern ATI_API_CALL __ke_size_t __ke_strlen(const char *s);
-extern char* ATI_API_CALL __ke_strcpy(char* d, const char* s);
-extern char* ATI_API_CALL __ke_strncpy(char* d, const char* s, __ke_size_t count);
-extern void __ke_printk(const char* fmt, ...);
-
-extern int ATI_API_CALL __ke_snprintf(char* buf, __ke_size_t size, const char* fmt, ...);
-extern int ATI_API_CALL KCL_CopyFromUserSpace(void* to, const void* from, __ke_size_t size);
-extern int ATI_API_CALL KCL_CopyToUserSpace(void* to, const void* from, __ke_size_t size);
-#define __ke_copy_from_user KCL_CopyFromUserSpace
-#define __ke_copy_to_user KCL_CopyToUserSpace
-extern int ATI_API_CALL __ke_verify_area(int type, const void * addr, unsigned long size);
-
-extern unsigned long ATI_API_CALL KAS_GetTickCounter(void);
-extern unsigned long ATI_API_CALL KAS_GetTicksPerSecond(void);
-
-
-#if DBG
-extern int ATI_API_CALL __ke_vsnprintf(char *buf, __ke_size_t size, const char *fmt, va_list ap);
-#define vsnprintf(_dst, _size, _fmt, varg) __ke_snprintf(_dst, _size, _fmt, varg)
-#endif // #if DBG
-
-
-// Note: This function is not defined in firegl_public.h.
-void firegl_hardwareHangRecovery(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-//
-// -------------------------- C/C++ standard typedefs ----------------------------
-//
-#ifdef __SIZE_TYPE__
-typedef __SIZE_TYPE__ size_t;
-#else // #ifdef __SIZE_TYPE__
-typedef unsigned int size_t;
-#endif // #ifdef __SIZE_TYPE__
-
-#ifdef __PTRDIFF_TYPE__
-typedef __PTRDIFF_TYPE__ ptrdiff_t;
-#else // #ifdef __PTRDIFF_TYPE__
-typedef int ptrdiff_t;
-#endif // #ifdef __PTRDIFF_TYPE__
-
-#ifndef NULL
-#ifdef __cplusplus
-#define NULL __null
-#else
-#define NULL ((void *)0)
-#endif
-#endif
-
-
-//
-// ------------------------- C/C++ standard macros ---------------------------
-//
-
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) // as it is defined in stddef.h
-#define CHAR_BIT 8 // as it is defined in limits.h
-
-//
-// --------------------------------- C RTL -----------------------------------
-//
-
-#define memset(_p, _v, _n) __ke_memset(_p, _v, _n)
-#define memcpy(_d, _s, _n) __ke_memcpy(_d, _s, _n)
-#define strlen(_s) __ke_strlen(_s)
-#define strcpy(_d, _s) __ke_strcpy(_d, _s)
-#define strncpy(_d, _s, _n) __ke_strncpy(_d, _s, _n)
-// Note: C99 supports macros with variable number of arguments. GCC also supports this C99 feature as
-// C++ extension.
-#define snprintf(_dst, _size, _fmt, arg...) __ke_snprintf(_dst, _size, _fmt, ##arg)
-
-
-#endif // #ifdef _lnx_common_defs_h_
-
diff --git a/src/amd/addrlib/meson.build b/src/amd/addrlib/meson.build
index a6cad1207b0..62beb0ecbc1 100644
--- a/src/amd/addrlib/meson.build
+++ b/src/amd/addrlib/meson.build
@@ -38,11 +38,9 @@ files_addrlib = files(
'gfx9/coord.h',
'gfx9/gfx9addrlib.cpp',
'gfx9/gfx9addrlib.h',
- 'gfx9/rbmap.cpp',
- 'gfx9/rbmap.h',
+ 'amdgpu_asic_addr.h',
'inc/chip/gfx9/gfx9_gb_reg.h',
'inc/chip/r800/si_gb_reg.h',
- 'inc/lnx_common_defs.h',
'r800/chip/si_ci_vi_merged_enum.h',
'r800/ciaddrlib.cpp',
'r800/ciaddrlib.h',
diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp
index 4f67350c82f..322dcf64ffd 100644
--- a/src/amd/addrlib/r800/ciaddrlib.cpp
+++ b/src/amd/addrlib/r800/ciaddrlib.cpp
@@ -35,15 +35,7 @@
#include "si_gb_reg.h"
-#include "si_ci_vi_merged_enum.h"
-
-#if BRAHMA_BUILD
-#include "amdgpu_id.h"
-#else
-#include "ci_id.h"
-#include "kv_id.h"
-#include "vi_id.h"
-#endif
+#include "amdgpu_asic_addr.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -188,7 +180,6 @@ CiLib::CiLib(const Client* pClient)
m_allowNonDispThickModes(FALSE)
{
m_class = CI_ADDRLIB;
- memset(&m_settings, 0, sizeof(m_settings));
}
/**
@@ -450,7 +441,6 @@ BOOL_32 CiLib::HwlInitGlobalParams(
// read the correct pipes from tile mode table
if (m_settings.isHawaii)
{
- // Hawaii has 16-pipe, see GFXIP_Config_Summary.xls
m_pipes = 16;
}
else if (m_settings.isBonaire || m_settings.isSpectre)
@@ -600,9 +590,9 @@ INT_32 CiLib::HwlPostCheckTileIndex(
****************************************************************************************************
*/
ADDR_E_RETURNCODE CiLib::HwlSetupTileCfg(
- UINT_32 bpp, ///< [in] Bits per pixel
- INT_32 index, ///< [in] Tile index
- INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI)
+ UINT_32 bpp, ///< Bits per pixel
+ INT_32 index, ///< Tile index
+ INT_32 macroModeIndex, ///< Index in macro tile mode table(CI)
ADDR_TILEINFO* pInfo, ///< [out] Tile Info
AddrTileMode* pMode, ///< [out] Tile mode
AddrTileType* pType ///< [out] Tile type
@@ -711,13 +701,12 @@ ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo(
ADDR_E_RETURNCODE retCode = SiLib::HwlComputeSurfaceInfo(pIn, pOut);
-
if ((pIn->mipLevel > 0) &&
(pOut->tcCompatible == TRUE) &&
(pOut->tileMode != pIn->tileMode) &&
(m_settings.isVolcanicIslands == TRUE))
{
- CheckTcCompatibility(pOut->pTileInfo, pIn->bpp, pOut->tileMode, pOut->tileType, pOut);
+ pOut->tcCompatible = CheckTcCompatibility(pOut->pTileInfo, pIn->bpp, pOut->tileMode, pOut->tileType, pOut);
}
if (pOut->macroModeIndex == TileIndexNoMacroIndex)
@@ -1572,7 +1561,7 @@ VOID CiLib::HwlSetupTileInfo(
if (flags.tcCompatible)
{
- CheckTcCompatibility(pTileInfo, bpp, tileMode, inTileType, pOut);
+ flags.tcCompatible = CheckTcCompatibility(pTileInfo, bpp, tileMode, inTileType, pOut);
}
pOut->tcCompatible = flags.tcCompatible;
@@ -2271,19 +2260,21 @@ BOOL_32 CiLib::DepthStencilTileCfgMatch(
* CiLib::DepthStencilTileCfgMatch
*
* @brief
-* Turn off TcCompatible if requirement is not met
+* Check if tc compatibility is available
* @return
-* N/A
+* If tc compatibility is not available
****************************************************************************************************
*/
-VOID CiLib::CheckTcCompatibility(
- const ADDR_TILEINFO* pTileInfo, ///< [in] input tile info
- UINT_32 bpp, ///< [in] Bits per pixel
- AddrTileMode tileMode, ///< [in] input tile mode
- AddrTileType tileType, ///< [in] input tile type
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] out structure
+BOOL_32 CiLib::CheckTcCompatibility(
+ const ADDR_TILEINFO* pTileInfo, ///< [in] input tile info
+ UINT_32 bpp, ///< [in] Bits per pixel
+ AddrTileMode tileMode, ///< [in] input tile mode
+ AddrTileType tileType, ///< [in] input tile type
+ const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in] output surf info
) const
{
+ BOOL_32 tcCompatible = TRUE;
+
if (IsMacroTiled(tileMode))
{
if (tileType != ADDR_DEPTH_SAMPLE_ORDER)
@@ -2309,7 +2300,7 @@ VOID CiLib::CheckTcCompatibility(
if (m_rowSize < colorTileSplit)
{
- pOut->tcCompatible = FALSE;
+ tcCompatible = FALSE;
}
}
}
@@ -2317,8 +2308,10 @@ VOID CiLib::CheckTcCompatibility(
else
{
// Client should not enable tc compatible for linear and 1D tile modes.
- pOut->tcCompatible = FALSE;
+ tcCompatible = FALSE;
}
+
+ return tcCompatible;
}
} // V1
diff --git a/src/amd/addrlib/r800/ciaddrlib.h b/src/amd/addrlib/r800/ciaddrlib.h
index 3c838dfc53c..c11b678574f 100644
--- a/src/amd/addrlib/r800/ciaddrlib.h
+++ b/src/amd/addrlib/r800/ciaddrlib.h
@@ -44,37 +44,6 @@ namespace V1
/**
****************************************************************************************************
-* @brief CI specific settings structure.
-****************************************************************************************************
-*/
-struct CIChipSettings
-{
- struct
- {
- UINT_32 isSeaIsland : 1;
- UINT_32 isBonaire : 1;
- UINT_32 isKaveri : 1;
- UINT_32 isSpectre : 1;
- UINT_32 isSpooky : 1;
- UINT_32 isKalindi : 1;
- // Hawaii is GFXIP 7.2
- UINT_32 isHawaii : 1;
-
- // VI
- UINT_32 isVolcanicIslands : 1;
- UINT_32 isIceland : 1;
- UINT_32 isTonga : 1;
- UINT_32 isFiji : 1;
- UINT_32 isPolaris10 : 1;
- UINT_32 isPolaris11 : 1;
- UINT_32 isPolaris12 : 1;
- // VI fusion (Carrizo)
- UINT_32 isCarrizo : 1;
- };
-};
-
-/**
-****************************************************************************************************
* @brief This class is the CI specific address library
* function set.
****************************************************************************************************
@@ -208,9 +177,8 @@ private:
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
- VOID CheckTcCompatibility(
- const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode,
- AddrTileType tileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+ BOOL_32 CheckTcCompatibility(const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode,
+ AddrTileType tileType, const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
static const UINT_32 MacroTileTableSize = 16;
static const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2;
@@ -221,8 +189,6 @@ private:
ADDR_TILEINFO m_macroTileTable[MacroTileTableSize];
UINT_32 m_noOfMacroEntries;
BOOL_32 m_allowNonDispThickModes;
-
- CIChipSettings m_settings;
};
} // V1
diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp
index 7affdecbf02..99aa6cf4cdb 100644
--- a/src/amd/addrlib/r800/egbaddrlib.cpp
+++ b/src/amd/addrlib/r800/egbaddrlib.cpp
@@ -739,13 +739,12 @@ BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMicroTiled(
AdjustPitchAlignment(flags, pPitchAlign);
- // ECR#393489
- // Workaround 2 for 1D tiling - There is HW bug for Carrizo
+ // Workaround 2 for 1D tiling - There is HW bug for Carrizo,
// where it requires the following alignments for 1D tiling.
if (flags.czDispCompatible && (mipLevel == 0))
{
*pBaseAlign = PowTwoAlign(*pBaseAlign, 4096); //Base address MOD 4096 = 0
- *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
+ *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
}
// end Carrizo workaround for 1D tilling
@@ -1091,6 +1090,8 @@ AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode(
ADDR_TILEINFO* pTileInfo ///< [in] ptr to bank structure
) const
{
+ UINT_64 bytesPerSlice;
+ (void)bytesPerSlice;
UINT_32 bytesPerTile;
AddrTileMode expTileMode = baseTileMode;
@@ -1100,6 +1101,7 @@ AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode(
//
// Compute the size of a slice.
//
+ bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples);
//
@@ -1329,12 +1331,6 @@ UINT_64 EgBasedLib::DispatchComputeSurfaceAddrFromCoord(
UINT_32* pBitPosition = &pOut->bitPosition;
UINT_64 addr;
-#if ADDR_AM_BUILD
- UINT_32 addr5Bit = 0;
- UINT_32 addr5Swizzle = pIn->addr5Swizzle;
- BOOL_32 is32ByteTile = pIn->is32ByteTile;
-#endif
-
// ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order
if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
{
@@ -1439,23 +1435,6 @@ UINT_64 EgBasedLib::DispatchComputeSurfaceAddrFromCoord(
break;
}
-#if ADDR_AM_BUILD
- if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
- {
- if (addr5Swizzle && isDepthSampleOrder && is32ByteTile)
- {
- UINT_32 tx = x >> 3;
- UINT_32 ty = y >> 3;
- UINT_32 tileBits = ((ty&0x3) << 2) | (tx&0x3);
-
- tileBits = tileBits & addr5Swizzle;
- addr5Bit = XorReduce(tileBits, 4);
-
- addr = addr | static_cast<UINT_64>(addr5Bit << 5);
- }
- }
-#endif
-
return addr;
}
@@ -2751,6 +2730,8 @@ ADDR_E_RETURNCODE EgBasedLib::HwlComputeBaseSwizzle(
{ 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK
};
+ UINT_32 pipes = HwlGetPipes(pTileInfo);
+ (void)pipes;
UINT_32 banks = pTileInfo ? pTileInfo->banks : 2;
UINT_32 hwNumBanks;
@@ -3379,20 +3360,6 @@ ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskAddrFromCoord(
{
ADDR_E_RETURNCODE retCode = ADDR_OK;
-#if ADDR_AM_BUILD
- if ((pIn->x > pIn->pitch) ||
- (pIn->y > pIn->height) ||
- (pIn->numSamples > m_maxSamples) ||
- (pIn->sample >= m_maxSamples))
- {
- retCode = ADDR_INVALIDPARAMS;
- }
- else
- {
- pOut->addr = DispatchComputeFmaskAddrFromCoord(pIn, pOut);
- }
-#endif
-
return retCode;
}
@@ -3412,618 +3379,9 @@ ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskCoordFromAddr(
{
ADDR_E_RETURNCODE retCode = ADDR_OK;
-#if ADDR_AM_BUILD
- if ((pIn->bitPosition >= 8) ||
- (pIn->numSamples > m_maxSamples))
- {
- retCode = ADDR_INVALIDPARAMS;
- }
- else
- {
- DispatchComputeFmaskCoordFromAddr(pIn, pOut);
- }
-#endif
-
return retCode;
}
-#if ADDR_AM_BUILD
-/**
-****************************************************************************************************
-* EgBasedLib::DispatchComputeFmaskAddrFromCoord
-*
-* @brief
-* Computes the FMASK address and bit position from a coordinate.
-* @return
-* The byte address
-****************************************************************************************************
-*/
-UINT_64 EgBasedLib::DispatchComputeFmaskAddrFromCoord(
- const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
- ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
- ) const
-{
- UINT_32 x = pIn->x;
- UINT_32 y = pIn->y;
- UINT_32 slice = pIn->slice;
- UINT_32 sample = pIn->sample;
- UINT_32 plane = pIn->plane;
- UINT_32 pitch = pIn->pitch;
- UINT_32 height = pIn->height;
- UINT_32 numSamples = pIn->numSamples;
- AddrTileMode tileMode = pIn->tileMode;
- BOOL_32 ignoreSE = pIn->ignoreSE;
- ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
- BOOL_32 resolved = pIn->resolved;
-
- UINT_32* pBitPosition = &pOut->bitPosition;
- UINT_64 addr = 0;
-
- ADDR_ASSERT(numSamples > 1);
- ADDR_ASSERT(Thickness(tileMode) == 1);
-
- switch (tileMode)
- {
- case ADDR_TM_1D_TILED_THIN1:
- addr = ComputeFmaskAddrFromCoordMicroTiled(x,
- y,
- slice,
- sample,
- plane,
- pitch,
- height,
- numSamples,
- tileMode,
- resolved,
- pBitPosition);
- break;
- case ADDR_TM_2D_TILED_THIN1: //fall through
- case ADDR_TM_3D_TILED_THIN1:
- UINT_32 pipeSwizzle;
- UINT_32 bankSwizzle;
-
- if (m_configFlags.useCombinedSwizzle)
- {
- ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
- &bankSwizzle, &pipeSwizzle);
- }
- else
- {
- pipeSwizzle = pIn->pipeSwizzle;
- bankSwizzle = pIn->bankSwizzle;
- }
-
- addr = ComputeFmaskAddrFromCoordMacroTiled(x,
- y,
- slice,
- sample,
- plane,
- pitch,
- height,
- numSamples,
- tileMode,
- pipeSwizzle,
- bankSwizzle,
- ignoreSE,
- pTileInfo,
- resolved,
- pBitPosition);
- break;
- default:
- *pBitPosition = 0;
- break;
- }
-
- return addr;
-}
-
-/**
-****************************************************************************************************
-* EgBasedLib::ComputeFmaskAddrFromCoordMicroTiled
-*
-* @brief
-* Computes the FMASK address and bit position from a coordinate for 1D tilied (micro
-* tiled)
-* @return
-* The byte address
-****************************************************************************************************
-*/
-UINT_64 EgBasedLib::ComputeFmaskAddrFromCoordMicroTiled(
- UINT_32 x, ///< [in] x coordinate
- UINT_32 y, ///< [in] y coordinate
- UINT_32 slice, ///< [in] slice index
- UINT_32 sample, ///< [in] sample number
- UINT_32 plane, ///< [in] plane number
- UINT_32 pitch, ///< [in] surface pitch in pixels
- UINT_32 height, ///< [in] surface height in pixels
- UINT_32 numSamples, ///< [in] number of samples
- AddrTileMode tileMode, ///< [in] tile mode
- BOOL_32 resolved, ///< [in] TRUE if this is for resolved fmask
- UINT_32* pBitPosition ///< [out] pointer to returned bit position
- ) const
-{
- UINT_64 addr = 0;
- UINT_32 effectiveBpp;
- UINT_32 effectiveSamples;
-
- //
- // 2xAA use the same layout as 4xAA
- //
- if (numSamples == 2)
- {
- numSamples = 4;
- }
-
- //
- // Compute the number of planes.
- //
- if (resolved == FALSE)
- {
- effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
- effectiveBpp = numSamples;
-
- //
- // Compute the address just like a color surface with numSamples bits per element and
- // numPlanes samples.
- //
- addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
- y,
- slice,
- plane, // sample
- effectiveBpp,
- pitch,
- height,
- effectiveSamples,
- tileMode,
- ADDR_NON_DISPLAYABLE,
- FALSE,
- pBitPosition);
-
- //
- // Compute the real bit position. Each (sample, plane) is stored with one bit per sample.
- //
-
- //
- // Compute the pixel index with in the micro tile
- //
- UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x % 8,
- y % 8,
- slice,
- 1,
- tileMode,
- ADDR_NON_DISPLAYABLE);
-
- *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1);
-
- UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition;
-
- addr = bitAddr / 8;
- }
- else
- {
- effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
- effectiveSamples = 1;
-
- //
- // Compute the address just like a color surface with numSamples bits per element and
- // numPlanes samples.
- //
- addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
- y,
- slice,
- sample,
- effectiveBpp,
- pitch,
- height,
- effectiveSamples,
- tileMode,
- ADDR_NON_DISPLAYABLE,
- TRUE,
- pBitPosition);
- }
-
- return addr;
-}
-
-/**
-****************************************************************************************************
-* EgBasedLib::ComputeFmaskAddrFromCoordMacroTiled
-*
-* @brief
-* Computes the FMASK address and bit position from a coordinate for 2D tilied (macro
-* tiled)
-* @return
-* The byte address
-****************************************************************************************************
-*/
-UINT_64 EgBasedLib::ComputeFmaskAddrFromCoordMacroTiled(
- UINT_32 x, ///< [in] x coordinate
- UINT_32 y, ///< [in] y coordinate
- UINT_32 slice, ///< [in] slice index
- UINT_32 sample, ///< [in] sample number
- UINT_32 plane, ///< [in] plane number
- UINT_32 pitch, ///< [in] surface pitch in pixels
- UINT_32 height, ///< [in] surface height in pixels
- UINT_32 numSamples, ///< [in] number of samples
- AddrTileMode tileMode, ///< [in] tile mode
- UINT_32 pipeSwizzle, ///< [in] pipe swizzle
- UINT_32 bankSwizzle, ///< [in] bank swizzle
- BOOL_32 ignoreSE, ///< [in] TRUE if ignore shader engine
- ADDR_TILEINFO* pTileInfo, ///< [in] bank structure.**All fields to be valid on entry**
- BOOL_32 resolved, ///< [in] TRUE if this is for resolved fmask
- UINT_32* pBitPosition ///< [out] pointer to returned bit position
- ) const
-{
- UINT_64 addr = 0;
- UINT_32 effectiveBpp;
- UINT_32 effectiveSamples;
-
- //
- // 2xAA use the same layout as 4xAA
- //
- if (numSamples == 2)
- {
- numSamples = 4;
- }
-
- //
- // Compute the number of planes.
- //
- if (resolved == FALSE)
- {
- effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
- effectiveBpp = numSamples;
-
- //
- // Compute the address just like a color surface with numSamples bits per element and
- // numPlanes samples.
- //
- addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
- y,
- slice,
- plane, // sample
- effectiveBpp,
- pitch,
- height,
- effectiveSamples,
- tileMode,
- ADDR_NON_DISPLAYABLE,// isdisp
- ignoreSE,// ignore_shader
- FALSE,// depth_sample_order
- pipeSwizzle,
- bankSwizzle,
- pTileInfo,
- pBitPosition);
-
- //
- // Compute the real bit position. Each (sample, plane) is stored with one bit per sample.
- //
-
-
- //
- // Compute the pixel index with in the micro tile
- //
- UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x ,
- y ,
- slice,
- effectiveBpp,
- tileMode,
- ADDR_NON_DISPLAYABLE);
-
- *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1);
-
- UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition;
-
- addr = bitAddr / 8;
-
- }
- else
- {
- effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
- effectiveSamples = 1;
-
- //
- // Compute the address just like a color surface with numSamples bits per element and
- // numPlanes samples.
- //
- addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
- y,
- slice,
- sample,
- effectiveBpp,
- pitch,
- height,
- effectiveSamples,
- tileMode,
- ADDR_NON_DISPLAYABLE,
- ignoreSE,
- TRUE,
- pipeSwizzle,
- bankSwizzle,
- pTileInfo,
- pBitPosition);
- }
-
- return addr;
-}
-
-/**
-****************************************************************************************************
-* EgBasedLib::ComputeFmaskCoordFromAddrMicroTiled
-*
-* @brief
-* Compute (x,y,slice,sample,plane) coordinates from fmask address
-* @return
-* N/A
-*
-****************************************************************************************************
-*/
-VOID EgBasedLib::ComputeFmaskCoordFromAddrMicroTiled(
- UINT_64 addr, ///< [in] byte address
- UINT_32 bitPosition,///< [in] bit position
- UINT_32 pitch, ///< [in] pitch in pixels
- UINT_32 height, ///< [in] height in pixels
- UINT_32 numSamples, ///< [in] number of samples (of color buffer)
- AddrTileMode tileMode, ///< [in] tile mode
- BOOL_32 resolved, ///< [in] TRUE if it is resolved fmask
- UINT_32* pX, ///< [out] X coord
- UINT_32* pY, ///< [out] Y coord
- UINT_32* pSlice, ///< [out] slice index
- UINT_32* pSample, ///< [out] sample index
- UINT_32* pPlane ///< [out] plane index
- ) const
-{
- UINT_32 effectiveBpp;
- UINT_32 effectiveSamples;
-
- // 2xAA use the same layout as 4xAA
- if (numSamples == 2)
- {
- numSamples = 4;
- }
-
- if (resolved == FALSE)
- {
- effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
- effectiveBpp = numSamples;
-
- ComputeSurfaceCoordFromAddrMicroTiled(addr,
- bitPosition,
- effectiveBpp,
- pitch,
- height,
- effectiveSamples,
- tileMode,
- 0, // tileBase
- 0, // compBits
- pX,
- pY,
- pSlice,
- pPlane,
- ADDR_NON_DISPLAYABLE, // microTileType
- FALSE // isDepthSampleOrder
- );
-
-
- if ( pSample )
- {
- *pSample = bitPosition % numSamples;
- }
- }
- else
- {
- effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
- effectiveSamples = 1;
-
- ComputeSurfaceCoordFromAddrMicroTiled(addr,
- bitPosition,
- effectiveBpp,
- pitch,
- height,
- effectiveSamples,
- tileMode,
- 0, // tileBase
- 0, // compBits
- pX,
- pY,
- pSlice,
- pSample,
- ADDR_NON_DISPLAYABLE, // microTileType
- TRUE // isDepthSampleOrder
- );
- }
-}
-
-/**
-****************************************************************************************************
-* EgBasedLib::ComputeFmaskCoordFromAddrMacroTiled
-*
-* @brief
-* Compute (x,y,slice,sample,plane) coordinates from
-* fmask address
-* @return
-* N/A
-*
-****************************************************************************************************
-*/
-VOID EgBasedLib::ComputeFmaskCoordFromAddrMacroTiled(
- UINT_64 addr, ///< [in] byte address
- UINT_32 bitPosition,///< [in] bit position
- UINT_32 pitch, ///< [in] pitch in pixels
- UINT_32 height, ///< [in] height in pixels
- UINT_32 numSamples, ///< [in] number of samples (of color buffer)
- AddrTileMode tileMode, ///< [in] tile mode
- UINT_32 pipeSwizzle,///< [in] pipe swizzle
- UINT_32 bankSwizzle,///< [in] bank swizzle
- BOOL_32 ignoreSE, ///< [in] TRUE if ignore shader engine
- ADDR_TILEINFO* pTileInfo, ///< [in] bank structure. **All fields to be valid on entry**
- BOOL_32 resolved, ///< [in] TRUE if it is resolved fmask
- UINT_32* pX, ///< [out] X coord
- UINT_32* pY, ///< [out] Y coord
- UINT_32* pSlice, ///< [out] slice index
- UINT_32* pSample, ///< [out] sample index
- UINT_32* pPlane ///< [out] plane index
- ) const
-{
- UINT_32 effectiveBpp;
- UINT_32 effectiveSamples;
-
- // 2xAA use the same layout as 4xAA
- if (numSamples == 2)
- {
- numSamples = 4;
- }
-
- //
- // Compute the number of planes.
- //
- if (resolved == FALSE)
- {
- effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
- effectiveBpp = numSamples;
-
- ComputeSurfaceCoordFromAddrMacroTiled(addr,
- bitPosition,
- effectiveBpp,
- pitch,
- height,
- effectiveSamples,
- tileMode,
- 0, // No tileBase
- 0, // No compBits
- ADDR_NON_DISPLAYABLE,
- ignoreSE,
- FALSE,
- pipeSwizzle,
- bankSwizzle,
- pTileInfo,
- pX,
- pY,
- pSlice,
- pPlane);
-
- if (pSample)
- {
- *pSample = bitPosition % numSamples;
- }
- }
- else
- {
- effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
- effectiveSamples = 1;
-
- ComputeSurfaceCoordFromAddrMacroTiled(addr,
- bitPosition,
- effectiveBpp,
- pitch,
- height,
- effectiveSamples,
- tileMode,
- 0, // No tileBase
- 0, // No compBits
- ADDR_NON_DISPLAYABLE,
- ignoreSE,
- TRUE,
- pipeSwizzle,
- bankSwizzle,
- pTileInfo,
- pX,
- pY,
- pSlice,
- pSample);
- }
-}
-
-/**
-****************************************************************************************************
-* EgBasedLib::DispatchComputeFmaskCoordFromAddr
-*
-* @brief
-* Compute (x,y,slice,sample,plane) coordinates from
-* fmask address
-* @return
-* N/A
-*
-****************************************************************************************************
-*/
-VOID EgBasedLib::DispatchComputeFmaskCoordFromAddr(
- const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
- ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
- ) const
-{
- UINT_64 addr = pIn->addr;
- UINT_32 bitPosition = pIn->bitPosition;
- UINT_32 pitch = pIn->pitch;
- UINT_32 height = pIn->height;
- UINT_32 numSamples = pIn->numSamples;
- AddrTileMode tileMode = pIn->tileMode;
- BOOL_32 ignoreSE = pIn->ignoreSE;
- ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
- BOOL_32 resolved = pIn->resolved;
-
- UINT_32* pX = &pOut->x;
- UINT_32* pY = &pOut->y;
- UINT_32* pSlice = &pOut->slice;
- UINT_32* pSample = &pOut->sample;
- UINT_32* pPlane = &pOut->plane;
-
- switch (tileMode)
- {
- case ADDR_TM_1D_TILED_THIN1:
- ComputeFmaskCoordFromAddrMicroTiled(addr,
- bitPosition,
- pitch,
- height,
- numSamples,
- tileMode,
- resolved,
- pX,
- pY,
- pSlice,
- pSample,
- pPlane);
- break;
- case ADDR_TM_2D_TILED_THIN1://fall through
- case ADDR_TM_3D_TILED_THIN1:
- UINT_32 pipeSwizzle;
- UINT_32 bankSwizzle;
-
- if (m_configFlags.useCombinedSwizzle)
- {
- ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
- &bankSwizzle, &pipeSwizzle);
- }
- else
- {
- pipeSwizzle = pIn->pipeSwizzle;
- bankSwizzle = pIn->bankSwizzle;
- }
-
- ComputeFmaskCoordFromAddrMacroTiled(addr,
- bitPosition,
- pitch,
- height,
- numSamples,
- tileMode,
- pipeSwizzle,
- bankSwizzle,
- ignoreSE,
- pTileInfo,
- resolved,
- pX,
- pY,
- pSlice,
- pSample,
- pPlane);
- break;
- default:
- ADDR_ASSERT_ALWAYS();
- break;
-
- }
-}
-#endif
-
/**
****************************************************************************************************
* EgBasedLib::ComputeFmaskNumPlanesFromNumSamples
diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp
index 9ee1335b3ae..0fb5c2befdc 100644
--- a/src/amd/addrlib/r800/siaddrlib.cpp
+++ b/src/amd/addrlib/r800/siaddrlib.cpp
@@ -32,16 +32,9 @@
*/
#include "siaddrlib.h"
-
#include "si_gb_reg.h"
-#include "si_ci_vi_merged_enum.h"
-
-#if BRAHMA_BUILD
-#include "amdgpu_id.h"
-#else
-#include "si_id.h"
-#endif
+#include "amdgpu_asic_addr.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2664,7 +2657,8 @@ ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo(
if ((pIn->numSlices > 1) &&
(IsMacroTiled(pOut->tileMode) == TRUE) &&
- (m_chipFamily == ADDR_CHIP_FAMILY_SI))
+ ((m_chipFamily == ADDR_CHIP_FAMILY_SI) ||
+ (IsPrtTileMode(pOut->tileMode) == FALSE)))
{
pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX;
}
@@ -2822,8 +2816,8 @@ VOID SiLib::HwlCheckLastMacroTiledLvl(
****************************************************************************************************
*/
AddrTileMode SiLib::HwlDegradeThickTileMode(
- AddrTileMode baseTileMode, ///< [in] base tile mode
- UINT_32 numSlices, ///< [in] current number of slices
+ AddrTileMode baseTileMode, ///< base tile mode
+ UINT_32 numSlices, ///< current number of slices
UINT_32* pBytesPerTile ///< [in,out] pointer to bytes per slice
) const
{
@@ -2963,9 +2957,9 @@ INT_32 SiLib::HwlPostCheckTileIndex(
****************************************************************************************************
*/
ADDR_E_RETURNCODE SiLib::HwlSetupTileCfg(
- UINT_32 bpp, ///< [in] Bits per pixel
- INT_32 index, ///< [in] Tile index
- INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI)
+ UINT_32 bpp, ///< Bits per pixel
+ INT_32 index, ///< Tile index
+ INT_32 macroModeIndex, ///< Index in macro tile mode table(CI)
ADDR_TILEINFO* pInfo, ///< [out] Tile Info
AddrTileMode* pMode, ///< [out] Tile mode
AddrTileType* pType ///< [out] Tile type
diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h
index faf63fde6c5..f07fc31a57d 100644
--- a/src/amd/addrlib/r800/siaddrlib.h
+++ b/src/amd/addrlib/r800/siaddrlib.h
@@ -59,18 +59,36 @@ struct TileConfig
* @brief SI specific settings structure.
****************************************************************************************************
*/
-struct SIChipSettings
+struct SiChipSettings
{
- struct
- {
- UINT_32 isSouthernIsland : 1;
- UINT_32 isTahiti : 1;
- UINT_32 isPitCairn : 1;
- UINT_32 isCapeVerde : 1;
- /// Oland/Hainan are of GFXIP 6.0, similar with SI
- UINT_32 isOland : 1;
- UINT_32 isHainan : 1;
- };
+ UINT_32 isSouthernIsland : 1;
+ UINT_32 isTahiti : 1;
+ UINT_32 isPitCairn : 1;
+ UINT_32 isCapeVerde : 1;
+ // Oland/Hainan are of GFXIP 6.0, similar with SI
+ UINT_32 isOland : 1;
+ UINT_32 isHainan : 1;
+
+ // CI
+ UINT_32 isSeaIsland : 1;
+ UINT_32 isBonaire : 1;
+ UINT_32 isKaveri : 1;
+ UINT_32 isSpectre : 1;
+ UINT_32 isSpooky : 1;
+ UINT_32 isKalindi : 1;
+ // Hawaii is GFXIP 7.2
+ UINT_32 isHawaii : 1;
+
+ // VI
+ UINT_32 isVolcanicIslands : 1;
+ UINT_32 isIceland : 1;
+ UINT_32 isTonga : 1;
+ UINT_32 isFiji : 1;
+ UINT_32 isPolaris10 : 1;
+ UINT_32 isPolaris11 : 1;
+ UINT_32 isPolaris12 : 1;
+ // VI fusion
+ UINT_32 isCarrizo : 1;
};
/**
@@ -312,12 +330,12 @@ protected:
UINT_32 m_uncompressDepthEqIndex;
+ SiChipSettings m_settings;
+
private:
VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const;
BOOL_32 InitTileSettingTable(const UINT_32 *pSetting, UINT_32 noOfEntries);
-
- SIChipSettings m_settings;
};
} // V1