diff options
Diffstat (limited to 'src/amd/addrlib')
25 files changed, 1324 insertions, 3104 deletions
diff --git a/src/amd/addrlib/addrinterface.cpp b/src/amd/addrlib/addrinterface.cpp index 638556bd893..5fdf7fc3c65 100644 --- a/src/amd/addrlib/addrinterface.cpp +++ b/src/amd/addrlib/addrinterface.cpp @@ -534,11 +534,11 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( if (pLib != NULL) { - returnCode = pLib->ComputeDccInfo(pIn, pOut); + returnCode = pLib->ComputeDccInfo(pIn, pOut); } else { - returnCode = ADDR_ERROR; + returnCode = ADDR_ERROR; } return returnCode; diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h index f0da083af5d..8124b745f21 100644 --- a/src/amd/addrlib/addrinterface.h +++ b/src/amd/addrlib/addrinterface.h @@ -528,7 +528,7 @@ typedef union _ADDR_SURFACE_FLAGS UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode UINT_32 matchStencilTileCfg : 1; ///< Select tile index of stencil as well as depth surface /// to make sure they share same tile config parameters - UINT_32 reserved : 3; ///< Reserved bits + UINT_32 reserved : 2; ///< Reserved bits }; UINT_32 value; @@ -714,12 +714,6 @@ typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT }; UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE }; - -#if ADDR_AM_BUILD // These two fields are not valid in SW blt since no HTILE access - UINT_32 addr5Swizzle; ///< ADDR5_SWIZZLE_MASK of DB_DEPTH_INFO - BOOL_32 is32ByteTile; ///< Caller must have access to HTILE buffer and know if - /// this tile is compressed to 32B -#endif } ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT; /** @@ -857,8 +851,11 @@ typedef union _ADDR_HTILE_FLAGS { struct { - UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable - UINT_32 reserved :31; ///< Reserved bits + UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable + UINT_32 skipTcCompatSizeAlign : 1; ///< Flag indicates that addrLib will not align htile + /// size to 256xBankxPipe when computing tc-compatible + /// htile info. + UINT_32 reserved : 30; ///< Reserved bits }; UINT_32 value; @@ -915,6 +912,9 @@ typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT UINT_64 sliceSize; ///< Slice size, in bytes. BOOL_32 sliceInterleaved; ///< Flag to indicate if different slice's htile is interleaved /// Compute engine clear can't be used if htile is interleaved + BOOL_32 nextMipLevelCompressible; ///< Flag to indicate whether HTILE can be enabled in + /// next mip level, it also indicates if memory set based + /// fast clear can be used for current mip level. } ADDR_COMPUTE_HTILE_INFO_OUTPUT; /** @@ -2188,7 +2188,6 @@ ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex( - /** **************************************************************************************************** * ADDR_PRT_INFO_INPUT @@ -2233,6 +2232,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo( const ADDR_PRT_INFO_INPUT* pIn, ADDR_PRT_INFO_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // DCC key functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2293,6 +2294,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ADDR_COMPUTE_DCCINFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_GET_MAX_ALIGNMENTS_OUTPUT @@ -2693,10 +2696,8 @@ typedef struct _ADDR2_META_MIP_INFO struct { - UINT_32 offset; ///< metadata offset within one slice, - /// the thickness of a slice is meta block depth. - UINT_32 sliceSize; ///< metadata size within one slice, - /// the thickness of a slice is meta block depth. + UINT_32 offset; + UINT_32 sliceSize; }; }; } ADDR2_META_MIP_INFO; @@ -2720,9 +2721,7 @@ typedef struct _ADDR2_COMPUTE_HTILE_INFO_INPUT UINT_32 unalignedHeight; ///< Depth surface original height (of mip0) UINT_32 numSlices; ///< Number of slices of depth surface (of mip0) UINT_32 numMipLevels; ///< Total mipmap levels of color surface - UINT_32 firstMipIdInTail; ///< id of the first mip in tail, - /// if no mip is in tail, it should be set to - /// number of mip levels + UINT_32 firstMipIdInTail; } ADDR2_COMPUTE_HTILE_INFO_INPUT; /** @@ -3308,8 +3307,7 @@ typedef struct _ADDR2_COMPUTE_DCCINFO_INPUT UINT_32 numMipLevels; ///< Total mipmap levels of color surface UINT_32 dataSurfaceSize; ///< The padded size of all slices and mip levels ///< useful in meta linear case - UINT_32 firstMipIdInTail; ///< The id of first mip in tail, if no mip is in tail, - /// it should be number of mip levels + UINT_32 firstMipIdInTail; } ADDR2_COMPUTE_DCCINFO_INPUT; /** @@ -3339,8 +3337,13 @@ typedef struct _ADDR2_COMPUTE_DCCINFO_OUTPUT UINT_32 metaBlkHeight; ///< DCC meta block height UINT_32 metaBlkDepth; ///< DCC meta block depth - UINT_32 fastClearSizePerSlice; ///< Size of DCC within a slice should be fast cleared - UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice + UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice + + union + { + UINT_32 fastClearSizePerSlice; ///< Size of DCC within a slice should be fast cleared + UINT_32 dccRamSliceSize; + }; ADDR2_META_MIP_INFO* pMipInfo; ///< DCC mip information } ADDR2_COMPUTE_DCCINFO_OUTPUT; @@ -3571,7 +3574,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern( * ADDR2_BLOCK_SET * * @brief -* Bit field that define block type +* Bit field that defines block type **************************************************************************************************** */ typedef union _ADDR2_BLOCK_SET @@ -3591,6 +3594,28 @@ typedef union _ADDR2_BLOCK_SET /** **************************************************************************************************** +* ADDR2_SWTYPE_SET +* +* @brief +* Bit field that defines swizzle type +**************************************************************************************************** +*/ +typedef union _ADDR2_SWTYPE_SET +{ + struct + { + UINT_32 sw_Z : 1; // SW_*_Z_* + UINT_32 sw_S : 1; // SW_*_S_* + UINT_32 sw_D : 1; // SW_*_D_* + UINT_32 sw_R : 1; // SW_*_R_* + UINT_32 reserved : 28; + }; + + UINT_32 value; +} ADDR2_SWTYPE_SET; + +/** +**************************************************************************************************** * ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * * @brief @@ -3607,6 +3632,7 @@ typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT AddrResrouceLocation resourceLoction; ///< Surface heap choice ADDR2_BLOCK_SET forbiddenBlock; ///< Client can use it to disable some block setting ///< such as linear for DXTn, tiled for YUV + ADDR2_SWTYPE_SET preferredSwSet; ///< Client can use it to specify sw type(s) wanted BOOL_32 noXor; ///< Do not use xor mode for this resource UINT_32 bpp; ///< bits per pixel UINT_32 width; ///< Width (of mip0), in pixels @@ -3632,12 +3658,15 @@ typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT */ typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT { - UINT_32 size; ///< Size of this structure in bytes + UINT_32 size; ///< Size of this structure in bytes - AddrSwizzleMode swizzleMode; ///< Suggested swizzle mode to be used - AddrResourceType resourceType; ///< Suggested resource type to program HW - ADDR2_BLOCK_SET validBlockSet; ///< Valid block type bit conbination - BOOL_32 canXor; ///< If client can use xor on a valid macro block type + AddrSwizzleMode swizzleMode; ///< Suggested swizzle mode to be used + AddrResourceType resourceType; ///< Suggested resource type to program HW + ADDR2_BLOCK_SET validBlockSet; ///< Valid block type bit conbination + BOOL_32 canXor; ///< If client can use xor on a valid macro block + /// type + ADDR2_SWTYPE_SET validSwTypeSet; ///< Valid swizzle type bit combination + ADDR2_SWTYPE_SET clientPreferredSwSet; ///< Client-preferred swizzle type bit combination } ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT; /** diff --git a/src/amd/addrlib/amdgpu_asic_addr.h b/src/amd/addrlib/amdgpu_asic_addr.h new file mode 100644 index 00000000000..ea957a88b4d --- /dev/null +++ b/src/amd/addrlib/amdgpu_asic_addr.h @@ -0,0 +1,129 @@ +/* + * Copyright © 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +#ifndef _AMDGPU_ASIC_ADDR_H +#define _AMDGPU_ASIC_ADDR_H + +#define ATI_VENDOR_ID 0x1002 +#define AMD_VENDOR_ID 0x1022 + +// AMDGPU_VENDOR_IS_AMD(vendorId) +#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID)) + +#define FAMILY_UNKNOWN 0x00 +#define FAMILY_TN 0x69 +#define FAMILY_SI 0x6E +#define FAMILY_CI 0x78 +#define FAMILY_KV 0x7D +#define FAMILY_VI 0x82 +#define FAMILY_POLARIS 0x82 +#define FAMILY_CZ 0x87 +#define FAMILY_AI 0x8D +#define FAMILY_RV 0x8E + +// AMDGPU_FAMILY_IS(familyId, familyName) +#define FAMILY_IS(f, fn) (f == FAMILY_##fn) +#define FAMILY_IS_TN(f) FAMILY_IS(f, TN) +#define FAMILY_IS_SI(f) FAMILY_IS(f, SI) +#define FAMILY_IS_CI(f) FAMILY_IS(f, CI) +#define FAMILY_IS_KV(f) FAMILY_IS(f, KV) +#define FAMILY_IS_VI(f) FAMILY_IS(f, VI) +#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS) +#define FAMILY_IS_CZ(f) FAMILY_IS(f, CZ) +#define FAMILY_IS_AI(f) FAMILY_IS(f, AI) +#define FAMILY_IS_RV(f) FAMILY_IS(f, RV) + +#define AMDGPU_UNKNOWN 0xFF + +#define AMDGPU_TAHITI_RANGE 0x05, 0x14 +#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28 +#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C +#define AMDGPU_OLAND_RANGE 0x3C, 0x46 +#define AMDGPU_HAINAN_RANGE 0x46, 0xFF + +#define AMDGPU_BONAIRE_RANGE 0x14, 0x28 +#define AMDGPU_HAWAII_RANGE 0x28, 0x3C + +#define AMDGPU_SPECTRE_RANGE 0x01, 0x41 +#define AMDGPU_SPOOKY_RANGE 0x41, 0x81 +#define AMDGPU_KALINDI_RANGE 0x81, 0xA1 +#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF + +#define AMDGPU_ICELAND_RANGE 0x01, 0x14 +#define AMDGPU_TONGA_RANGE 0x14, 0x28 +#define AMDGPU_FIJI_RANGE 0x3C, 0x50 + +#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A +#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64 +#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E + +#define AMDGPU_CARRIZO_RANGE 0x01, 0x21 +#define AMDGPU_BRISTOL_RANGE 0x10, 0x21 +#define AMDGPU_STONEY_RANGE 0x61, 0xFF + +#define AMDGPU_VEGA10_RANGE 0x01, 0x14 + +#define AMDGPU_RAVEN_RANGE 0x01, 0x81 + +#define AMDGPU_EXPAND_FIX(x) x +#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max)) +#define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__)) + + +// ASICREV_IS(eRevisionId, revisionName) +#define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE) +#define ASICREV_IS_TAHITI_P(r) ASICREV_IS(r, TAHITI) +#define ASICREV_IS_PITCAIRN_PM(r) ASICREV_IS(r, PITCAIRN) +#define ASICREV_IS_CAPEVERDE_M(r) ASICREV_IS(r, CAPEVERDE) +#define ASICREV_IS_OLAND_M(r) ASICREV_IS(r, OLAND) +#define ASICREV_IS_HAINAN_V(r) ASICREV_IS(r, HAINAN) + +#define ASICREV_IS_BONAIRE_M(r) ASICREV_IS(r, BONAIRE) +#define ASICREV_IS_HAWAII_P(r) ASICREV_IS(r, HAWAII) + +#define ASICREV_IS_SPECTRE(r) ASICREV_IS(r, SPECTRE) +#define ASICREV_IS_SPOOKY(r) ASICREV_IS(r, SPOOKY) +#define ASICREV_IS_KALINDI(r) ASICREV_IS(r, KALINDI) +#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI) + +#define ASICREV_IS_ICELAND_M(r) ASICREV_IS(r, ICELAND) +#define ASICREV_IS_TONGA_P(r) ASICREV_IS(r, TONGA) +#define ASICREV_IS_FIJI_P(r) ASICREV_IS(r, FIJI) + +#define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10) +#define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11) +#define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12) + +#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO) +#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL) +#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY) + +#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10) +#define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10) + +#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN) + +#endif // _AMDGPU_ASIC_ADDR_H diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h index 8f5f1bfb374..62f8ac61618 100644 --- a/src/amd/addrlib/core/addrcommon.h +++ b/src/amd/addrlib/core/addrcommon.h @@ -36,15 +36,9 @@ #include "addrinterface.h" -// ADDR_LNX_KERNEL_BUILD is for internal build -// Moved from addrinterface.h so __KERNEL__ is not needed any more -#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__)) - #include "lnx_common_defs.h" // ported from cmmqs -#elif !defined(__APPLE__) || defined(HAVE_TSERVER) - #include <assert.h> - #include <stdlib.h> - #include <string.h> -#endif +#include <stdlib.h> +#include <string.h> +#include <assert.h> #if BRAHMA_BUILD && !defined(DEBUG) #ifdef NDEBUG @@ -171,6 +165,8 @@ #endif // DEBUG //////////////////////////////////////////////////////////////////////////////////////////////////// +#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1] + namespace Addr { diff --git a/src/amd/addrlib/core/addrelemlib.cpp b/src/amd/addrlib/core/addrelemlib.cpp index 4bc46e0f585..c9e6da4729a 100644 --- a/src/amd/addrlib/core/addrelemlib.cpp +++ b/src/amd/addrlib/core/addrelemlib.cpp @@ -1271,6 +1271,9 @@ VOID ElemLib::RestoreSurfaceInfo( UINT_32 height; UINT_32 bpp; + BOOL_32 bBCnFormat = FALSE; + (void)bBCnFormat; + ADDR_ASSERT(pBpp != NULL); ADDR_ASSERT(pWidth != NULL && pHeight != NULL); @@ -1289,22 +1292,17 @@ VOID ElemLib::RestoreSurfaceInfo( break; case ADDR_PACKED_GBGR: case ADDR_PACKED_BGRG: - if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI) - { - originalBits = bpp / expandX; - } - else - { - originalBits = bpp; // 32-bit packed ==> 2 32-bit result - } + originalBits = bpp; // 32-bit packed ==> 2 32-bit result break; case ADDR_PACKED_BC1: // Fall through case ADDR_PACKED_BC4: originalBits = 64; + bBCnFormat = TRUE; break; case ADDR_PACKED_BC2: // Fall through case ADDR_PACKED_BC3: // Fall through case ADDR_PACKED_BC5: + bBCnFormat = TRUE; // fall through case ADDR_PACKED_ASTC: case ADDR_PACKED_ETC2_128BPP: @@ -1394,27 +1392,11 @@ UINT_32 ElemLib::GetBitsPerPixel( break; case ADDR_FMT_GB_GR: // treat as FMT_8_8 elemMode = ADDR_PACKED_GBGR; - if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI) - { - bpp = 32; - expandX = 2; - } - else - { - bpp = 16; - } + bpp = 16; break; case ADDR_FMT_BG_RG: // treat as FMT_8_8 elemMode = ADDR_PACKED_BGRG; - if (m_pAddrLib->GetChipFamily() >= ADDR_CHIP_FAMILY_AI) - { - bpp = 32; - expandX = 2; - } - else - { - bpp = 16; - } + bpp = 16; break; case ADDR_FMT_8_8_8_8: case ADDR_FMT_2_10_10_10: diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp index 65fd3451a0d..a6ac5ecf836 100644 --- a/src/amd/addrlib/core/addrlib.cpp +++ b/src/amd/addrlib/core/addrlib.cpp @@ -218,7 +218,16 @@ ADDR_E_RETURNCODE Lib::Create( } break; case CIASICIDGFXENGINE_ARCTICISLAND: - pLib = Gfx9HwlInit(&client); + switch (pCreateIn->chipFamily) + { + case FAMILY_AI: + case FAMILY_RV: + pLib = Gfx9HwlInit(&client); + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } break; default: ADDR_ASSERT_ALWAYS(); diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h index 20700844272..8db65a61c87 100644 --- a/src/amd/addrlib/core/addrlib.h +++ b/src/amd/addrlib/core/addrlib.h @@ -38,11 +38,7 @@ #include "addrobject.h" #include "addrelemlib.h" -#if BRAHMA_BUILD -#include "amdgpu_id.h" -#else -#include "atiid.h" -#endif +#include "amdgpu_asic_addr.h" #ifndef CIASICIDGFXENGINE_R600 #define CIASICIDGFXENGINE_R600 0x00000006 @@ -128,6 +124,123 @@ enum BankSwapSize /** **************************************************************************************************** +* @brief Enums that define max compressed fragments config +**************************************************************************************************** +*/ +enum NumMaxCompressedFragmentsConfig +{ + ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS = 0x00000000, + ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS = 0x00000001, + ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS = 0x00000002, + ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS = 0x00000003, +}; + +/** +**************************************************************************************************** +* @brief Enums that define num pipes config +**************************************************************************************************** +*/ +enum NumPipesConfig +{ + ADDR_CONFIG_1_PIPE = 0x00000000, + ADDR_CONFIG_2_PIPE = 0x00000001, + ADDR_CONFIG_4_PIPE = 0x00000002, + ADDR_CONFIG_8_PIPE = 0x00000003, + ADDR_CONFIG_16_PIPE = 0x00000004, + ADDR_CONFIG_32_PIPE = 0x00000005, + ADDR_CONFIG_64_PIPE = 0x00000006, +}; + +/** +**************************************************************************************************** +* @brief Enums that define num banks config +**************************************************************************************************** +*/ +enum NumBanksConfig +{ + ADDR_CONFIG_1_BANK = 0x00000000, + ADDR_CONFIG_2_BANK = 0x00000001, + ADDR_CONFIG_4_BANK = 0x00000002, + ADDR_CONFIG_8_BANK = 0x00000003, + ADDR_CONFIG_16_BANK = 0x00000004, +}; + +/** +**************************************************************************************************** +* @brief Enums that define num rb per shader engine config +**************************************************************************************************** +*/ +enum NumRbPerShaderEngineConfig +{ + ADDR_CONFIG_1_RB_PER_SHADER_ENGINE = 0x00000000, + ADDR_CONFIG_2_RB_PER_SHADER_ENGINE = 0x00000001, + ADDR_CONFIG_4_RB_PER_SHADER_ENGINE = 0x00000002, +}; + +/** +**************************************************************************************************** +* @brief Enums that define num shader engines config +**************************************************************************************************** +*/ +enum NumShaderEnginesConfig +{ + ADDR_CONFIG_1_SHADER_ENGINE = 0x00000000, + ADDR_CONFIG_2_SHADER_ENGINE = 0x00000001, + ADDR_CONFIG_4_SHADER_ENGINE = 0x00000002, + ADDR_CONFIG_8_SHADER_ENGINE = 0x00000003, +}; + +/** +**************************************************************************************************** +* @brief Enums that define pipe interleave size config +**************************************************************************************************** +*/ +enum PipeInterleaveSizeConfig +{ + ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x00000000, + ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x00000001, + ADDR_CONFIG_PIPE_INTERLEAVE_1KB = 0x00000002, + ADDR_CONFIG_PIPE_INTERLEAVE_2KB = 0x00000003, +}; + +/** +**************************************************************************************************** +* @brief Enums that define row size config +**************************************************************************************************** +*/ +enum RowSizeConfig +{ + ADDR_CONFIG_1KB_ROW = 0x00000000, + ADDR_CONFIG_2KB_ROW = 0x00000001, + ADDR_CONFIG_4KB_ROW = 0x00000002, +}; + +/** +**************************************************************************************************** +* @brief Enums that define bank interleave size config +**************************************************************************************************** +*/ +enum BankInterleaveSizeConfig +{ + ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x00000000, + ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x00000001, + ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x00000002, + ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x00000003, +}; + +/** +**************************************************************************************************** +* @brief Enums that define engine tile size config +**************************************************************************************************** +*/ +enum ShaderEngineTileSizeConfig +{ + ADDR_CONFIG_SE_TILE_16 = 0x00000000, + ADDR_CONFIG_SE_TILE_32 = 0x00000001, +}; + +/** +**************************************************************************************************** * @brief This class contains asic independent address lib functionalities **************************************************************************************************** */ diff --git a/src/amd/addrlib/core/addrlib1.cpp b/src/amd/addrlib/core/addrlib1.cpp index 548b24b7b69..c796a63436c 100644 --- a/src/amd/addrlib/core/addrlib1.cpp +++ b/src/amd/addrlib/core/addrlib1.cpp @@ -1281,36 +1281,54 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo( if (returnCode == ADDR_OK) { - pOut->bpp = ComputeHtileInfo(pIn->flags, - pIn->pitch, - pIn->height, - pIn->numSlices, - pIn->isLinear, - isWidth8, - isHeight8, - pIn->pTileInfo, - &pOut->pitch, - &pOut->height, - &pOut->htileBytes, - &pOut->macroWidth, - &pOut->macroHeight, - &pOut->sliceSize, - &pOut->baseAlign); - - if (pIn->flags.tcCompatible && (pIn->numSlices > 1)) + if (pIn->flags.tcCompatible) { - pOut->sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8); - - const UINT_32 align = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes; + const UINT_32 sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8); + const UINT_32 align = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes; - if ((pOut->sliceSize % align) == 0) + if (pIn->numSlices > 1) { - pOut->sliceInterleaved = FALSE; + const UINT_32 surfBytes = (sliceSize * pIn->numSlices); + + pOut->sliceSize = sliceSize; + pOut->htileBytes = pIn->flags.skipTcCompatSizeAlign ? + surfBytes : PowTwoAlign(surfBytes, align); + pOut->sliceInterleaved = ((sliceSize % align) != 0) ? TRUE : FALSE; } else { - pOut->sliceInterleaved = TRUE; + pOut->sliceSize = pIn->flags.skipTcCompatSizeAlign ? + sliceSize : PowTwoAlign(sliceSize, align); + pOut->htileBytes = pOut->sliceSize; + pOut->sliceInterleaved = FALSE; } + + pOut->nextMipLevelCompressible = ((sliceSize % align) == 0) ? TRUE : FALSE; + + pOut->pitch = pIn->pitch; + pOut->height = pIn->height; + pOut->baseAlign = align; + pOut->macroWidth = 0; + pOut->macroHeight = 0; + pOut->bpp = 32; + } + else + { + pOut->bpp = ComputeHtileInfo(pIn->flags, + pIn->pitch, + pIn->height, + pIn->numSlices, + pIn->isLinear, + isWidth8, + isHeight8, + pIn->pTileInfo, + &pOut->pitch, + &pOut->height, + &pOut->htileBytes, + &pOut->macroWidth, + &pOut->macroHeight, + &pOut->sliceSize, + &pOut->baseAlign); } } } @@ -2162,6 +2180,8 @@ VOID Lib::HwlComputeXmaskCoordFromAddr( { UINT_32 pipe; UINT_32 numPipes; + UINT_32 numGroupBits; + (void)numGroupBits; UINT_32 numPipeBits; UINT_32 macroTilePitch; UINT_32 macroTileHeight; @@ -2204,6 +2224,7 @@ VOID Lib::HwlComputeXmaskCoordFromAddr( // // Compute the number of group and pipe bits. // + numGroupBits = Log2(m_pipeInterleaveBytes); numPipeBits = Log2(numPipes); UINT_32 groupBits = 8 * m_pipeInterleaveBytes; @@ -3504,6 +3525,10 @@ VOID Lib::ComputeMipLevel( ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure ) const { + // Check if HWL has handled + BOOL_32 hwlHandled = FALSE; + (void)hwlHandled; + if (ElemLib::IsBlockCompressed(pIn->format)) { if (pIn->mipLevel == 0) @@ -3517,7 +3542,7 @@ VOID Lib::ComputeMipLevel( } } - HwlComputeMipLevel(pIn); + hwlHandled = HwlComputeMipLevel(pIn); } /** diff --git a/src/amd/addrlib/core/addrlib2.cpp b/src/amd/addrlib/core/addrlib2.cpp index 57505d35af5..ddaf597f9dd 100644 --- a/src/amd/addrlib/core/addrlib2.cpp +++ b/src/amd/addrlib/core/addrlib2.cpp @@ -355,6 +355,11 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord( { returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut); } + + if (returnCode == ADDR_OK) + { + pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024)); + } } return returnCode; @@ -460,8 +465,7 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo( */ ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord( const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure { ADDR_E_RETURNCODE returnCode; @@ -492,8 +496,7 @@ ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord( */ ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr( const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure { ADDR_E_RETURNCODE returnCode; @@ -560,8 +563,7 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( */ ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord( const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure { ADDR_E_RETURNCODE returnCode; @@ -780,8 +782,7 @@ ADDR_E_RETURNCODE Lib::ComputeDccInfo( */ ADDR_E_RETURNCODE Lib::ComputeDccAddrFromCoord( const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure { ADDR_E_RETURNCODE returnCode; @@ -1047,77 +1048,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoLinear( ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure ) const { - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - UINT_32 pitch = 0; - UINT_32 actualHeight = 0; - UINT_32 elementBytes = pIn->bpp >> 3; - const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256; - - if (IsTex1d(pIn->resourceType)) - { - if (pIn->height > 1) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - const UINT_32 pitchAlignInElement = alignment / elementBytes; - pitch = PowTwoAlign(pIn->width, pitchAlignInElement); - actualHeight = pIn->numMipLevels; - - if (pIn->flags.prt == FALSE) - { - returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, - &pitch, &actualHeight); - } - - if (returnCode == ADDR_OK) - { - if (pOut->pMipInfo != NULL) - { - for (UINT_32 i = 0; i < pIn->numMipLevels; i++) - { - pOut->pMipInfo[i].offset = pitch * elementBytes * i; - pOut->pMipInfo[i].pitch = pitch; - pOut->pMipInfo[i].height = 1; - pOut->pMipInfo[i].depth = 1; - } - } - } - } - } - else - { - returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo); - } - - if ((pitch == 0) || (actualHeight == 0)) - { - returnCode = ADDR_INVALIDPARAMS; - } - - if (returnCode == ADDR_OK) - { - pOut->pitch = pitch; - pOut->height = pIn->height; - pOut->numSlices = pIn->numSlices; - pOut->mipChainPitch = pitch; - pOut->mipChainHeight = actualHeight; - pOut->mipChainSlice = pOut->numSlices; - pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE; - pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes; - pOut->surfSize = pOut->sliceSize * pOut->numSlices; - pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment; - pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 * 8 / pIn->bpp); - pOut->blockHeight = 1; - pOut->blockSlices = 1; - } - - // Post calculation validate - ADDR_ASSERT(pOut->sliceSize > 0); - - return returnCode; + return HwlComputeSurfaceInfoLinear(pIn, pOut); } /** @@ -1170,6 +1101,8 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear( { ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; + ADDR2_MIP_INFO mipInfo[MaxMipLevels]; + localIn.bpp = pIn->bpp; localIn.flags = pIn->flags; localIn.width = Max(pIn->unalignedWidth, 1u); @@ -1177,32 +1110,21 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear( localIn.numSlices = Max(pIn->numSlices, 1u); localIn.numMipLevels = Max(pIn->numMipLevels, 1u); localIn.resourceType = pIn->resourceType; + if (localIn.numMipLevels <= 1) { localIn.pitchInElement = pIn->pitchInElement; } + + localOut.pMipInfo = mipInfo; + returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut); if (returnCode == ADDR_OK) { - UINT_32 elementBytes = pIn->bpp >> 3; - UINT_64 sliceOffsetInSurf = localOut.sliceSize * pIn->slice; - UINT_64 mipOffsetInSlice = 0; - UINT_64 offsetInMip = 0; - - if (IsTex1d(pIn->resourceType)) - { - offsetInMip = static_cast<UINT_64>(pIn->x) * elementBytes; - mipOffsetInSlice = static_cast<UINT_64>(pIn->mipId) * localOut.pitch * elementBytes; - } - else - { - UINT_64 mipStartHeight = SumGeo(localIn.height, pIn->mipId); - mipOffsetInSlice = static_cast<UINT_64>(mipStartHeight) * localOut.pitch * elementBytes; - offsetInMip = (pIn->y * localOut.pitch + pIn->x) * elementBytes; - } - - pOut->addr = sliceOffsetInSurf + mipOffsetInSlice + offsetInMip; + pOut->addr = (localOut.sliceSize * pIn->slice) + + mipInfo[pIn->mipId].offset + + (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3); pOut->bitPosition = 0; } else @@ -1400,73 +1322,6 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrTiled( /** ************************************************************************************************************************ -* Lib::ComputeSurfaceInfoLinear -* -* @brief -* Internal function to calculate padding for linear swizzle 2D/3D surface -* -* @return -* N/A -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceLinearPadding( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture - UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element - UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW - ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - UINT_32 elementBytes = pIn->bpp >> 3; - UINT_32 pitchAlignInElement = 0; - - if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) - { - ADDR_ASSERT(pIn->numMipLevels <= 1); - ADDR_ASSERT(pIn->numSlices <= 1); - pitchAlignInElement = 1; - } - else - { - pitchAlignInElement = (256 / elementBytes); - } - - UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement); - UINT_32 slice0PaddedHeight = pIn->height; - - returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, - &mipChainWidth, &slice0PaddedHeight); - - if (returnCode == ADDR_OK) - { - UINT_32 mipChainHeight = 0; - UINT_32 mipHeight = pIn->height; - - for (UINT_32 i = 0; i < pIn->numMipLevels; i++) - { - if (pMipInfo != NULL) - { - pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes; - pMipInfo[i].pitch = mipChainWidth; - pMipInfo[i].height = mipHeight; - pMipInfo[i].depth = 1; - } - - mipChainHeight += mipHeight; - mipHeight = RoundHalf(mipHeight); - mipHeight = Max(mipHeight, 1u); - } - - *pMipmap0PaddedWidth = mipChainWidth; - *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight; - } - - return returnCode; -} - -/** -************************************************************************************************************************ * Lib::ComputeBlockDimensionForSurf * * @brief diff --git a/src/amd/addrlib/core/addrlib2.h b/src/amd/addrlib/core/addrlib2.h index e98fddcd2d0..bea2a485a61 100644 --- a/src/amd/addrlib/core/addrlib2.h +++ b/src/amd/addrlib/core/addrlib2.h @@ -103,63 +103,63 @@ public: // For data surface ADDR_E_RETURNCODE ComputeSurfaceInfo( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord( const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr( - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; // For HTile ADDR_E_RETURNCODE ComputeHtileInfo( const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; ADDR_E_RETURNCODE ComputeHtileAddrFromCoord( - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); ADDR_E_RETURNCODE ComputeHtileCoordFromAddr( - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const; + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); // For CMask ADDR_E_RETURNCODE ComputeCmaskInfo( const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord( - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr( - const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const; + const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const; // For FMask ADDR_E_RETURNCODE ComputeFmaskInfo( - const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut); + const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut); ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord( - const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr( - const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; + const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; // For DCC key ADDR_E_RETURNCODE ComputeDccInfo( const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const; + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const; ADDR_E_RETURNCODE ComputeDccAddrFromCoord( - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const; + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); // Misc ADDR_E_RETURNCODE ComputePipeBankXor( @@ -197,6 +197,8 @@ protected: static const UINT_32 PrtAlignment = 64 * 1024; static const UINT_32 MaxMacroBits = 20; + static const UINT_32 MaxMipLevels = 16; + // Checking block size BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const { @@ -402,32 +404,32 @@ protected: } virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord( - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) { ADDR_NOT_IMPLEMENTED(); return ADDR_NOTSUPPORTED; } virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) { ADDR_NOT_IMPLEMENTED(); return ADDR_NOTSUPPORTED; } virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) { ADDR_NOT_IMPLEMENTED(); return ADDR_NOTSUPPORTED; } virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr( - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) { ADDR_NOT_IMPLEMENTED(); return ADDR_NOTSUPPORTED; @@ -532,6 +534,14 @@ protected: return ADDR_NOTIMPLEMENTED; } + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTIMPLEMENTED; + } + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled( const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const @@ -565,12 +575,6 @@ protected: const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - ADDR_E_RETURNCODE ComputeSurfaceLinearPadding( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - UINT_32* pMipmap0PaddedWidth, - UINT_32* pSlice0PaddedHeight, - ADDR2_MIP_INFO* pMipInfo = NULL) const; - ADDR_E_RETURNCODE ComputeSurfaceInfoTiled( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; @@ -757,10 +761,10 @@ protected: ADDR_E_RETURNCODE ApplyCustomizedPitchHeight( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - UINT_32 elementBytes, - UINT_32 pitchAlignInElement, - UINT_32* pPitch, - UINT_32* pHeight) const; + UINT_32 elementBytes, + UINT_32 pitchAlignInElement, + UINT_32* pPitch, + UINT_32* pHeight) const; VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; diff --git a/src/amd/addrlib/gfx9/coord.cpp b/src/amd/addrlib/gfx9/coord.cpp index effdc90017e..228d8f1872b 100644 --- a/src/amd/addrlib/gfx9/coord.cpp +++ b/src/amd/addrlib/gfx9/coord.cpp @@ -34,20 +34,20 @@ Coordinate::Coordinate() ord = 0; } -Coordinate::Coordinate(INT_8 c, UINT_32 n) +Coordinate::Coordinate(INT_8 c, INT_32 n) { - set(c,n); + set(c, n); } -VOID Coordinate::set(INT_8 c, UINT_32 n) +VOID Coordinate::set(INT_8 c, INT_32 n) { dim = c; ord = static_cast<INT_8>(n); } -UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) +UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const { - UINT_32 bit = 1 << (UINT_32)ord; + UINT_32 bit = static_cast<UINT_32>(1ull << static_cast<UINT_32>(ord)); UINT_32 out = 0; switch (dim) @@ -234,7 +234,7 @@ UINT_32 CoordTerm::getsize() return num_coords; } -UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) +UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const { UINT_32 out = 0; for (UINT_32 i = 0; i < num_coords; i++) @@ -386,7 +386,7 @@ UINT_32 CoordEq::getsize() return m_numBits; } -UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) +UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const { UINT_64 out = 0; for (UINT_32 i = 0; i < m_numBits; i++) @@ -401,7 +401,7 @@ UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) VOID CoordEq::solveAddr( UINT_64 addr, UINT_32 sliceInM, - UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) + UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const { UINT_32 xBitsValid = 0; UINT_32 yBitsValid = 0; diff --git a/src/amd/addrlib/gfx9/coord.h b/src/amd/addrlib/gfx9/coord.h index 28c57c17fe1..4243d3069a9 100644 --- a/src/amd/addrlib/gfx9/coord.h +++ b/src/amd/addrlib/gfx9/coord.h @@ -33,12 +33,12 @@ class Coordinate { public: Coordinate(); - Coordinate(INT_8 c, UINT_32 n); + Coordinate(INT_8 c, INT_32 n); - VOID set(INT_8 c, UINT_32 n); - UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0); - INT_8 getdim(); - INT_8 getord(); + VOID set(INT_8 c, INT_32 n); + UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; + INT_8 getdim(); + INT_8 getord(); BOOL_32 operator==(const Coordinate& b); BOOL_32 operator<(const Coordinate& b); @@ -64,7 +64,7 @@ public: BOOL_32 Exists(Coordinate& co); VOID copyto(CoordTerm& cl); UINT_32 getsize(); - UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0); + UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; VOID getsmallest(Coordinate& co); UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0'); @@ -87,9 +87,9 @@ public: BOOL_32 Exists(Coordinate& co); VOID resize(UINT_32 n); UINT_32 getsize(); - virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0); + virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM, - UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m); + UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const; VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF); VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF); diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp index edb4c6e636a..e06f13c0afe 100644 --- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp +++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp @@ -34,14 +34,8 @@ #include "gfx9addrlib.h" #include "gfx9_gb_reg.h" -#include "gfx9_enum.h" -#if BRAHMA_BUILD -#include "amdgpu_id.h" -#else -#include "ai_id.h" -#include "rv_id.h" -#endif +#include "amdgpu_asic_addr.h" //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -183,7 +177,14 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( } else { - numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; + if (m_settings.applyAliasFix) + { + numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); + } + else + { + numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; + } } numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; @@ -222,6 +223,11 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + if (m_settings.htileAlignFix) + { + sizeAlign <<= 1; + } + pOut->pitch = numMetaBlkX * metaBlkDim.w; pOut->height = numMetaBlkY * metaBlkDim.h; pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4; @@ -284,7 +290,14 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo( } else { - numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; + if (m_settings.applyAliasFix) + { + numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); + } + else + { + numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; + } numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u); } @@ -569,8 +582,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( if ((numPipeTotal > 1) || (numRbTotal > 1)) { + const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10); + numCompressBlkPerMetaBlk = - Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : 1024)); + Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize)); if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp) { @@ -685,8 +700,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments( */ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure { ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0}; input.size = sizeof(input); @@ -710,11 +724,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - CoordEq metaEq; - - GetMetaEquation(&metaEq, 0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, - Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0); + const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, + Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -724,7 +736,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); + UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); pOut->addr = address >> 1; pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2); @@ -754,8 +766,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( */ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure { ADDR_E_RETURNCODE returnCode = ADDR_OK; @@ -787,11 +798,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - CoordEq metaEq; - - GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0); + const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -801,7 +810,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); + UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); pOut->addr = address >> 1; @@ -830,8 +839,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( */ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure { ADDR_E_RETURNCODE returnCode = ADDR_OK; @@ -862,11 +870,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - CoordEq metaEq; - - GetMetaEquation(&metaEq, 0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0); + const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}); UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, pIn->swizzleMode); @@ -879,7 +885,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; UINT_32 x, y, z, s, m; - metaEq.solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m); + pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m); pOut->slice = m / sliceSizeInBlock; pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y; @@ -903,7 +909,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( */ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) { ADDR_E_RETURNCODE returnCode = ADDR_OK; @@ -942,12 +948,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); - CoordEq metaEq; - - GetMetaEquation(&metaEq, pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, - Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, - compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2); + const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, + Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, + compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}); UINT_32 xb = pIn->x / output.metaBlkWidth; UINT_32 yb = pIn->y / output.metaBlkHeight; @@ -957,7 +961,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - UINT_64 address = metaEq.solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex); + UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex); pOut->addr = address >> 1; @@ -1184,16 +1188,18 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily( m_settings.isArcticIsland = 1; m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision); - if (m_settings.isVega10) + m_settings.isDce12 = 1; + + if (m_settings.isVega10 == 0) { - m_settings.isDce12 = 1; + m_settings.htileAlignFix = 1; + m_settings.applyAliasFix = 1; } m_settings.metaBaseAlignFix = 1; m_settings.depthPipeXorDisable = 1; break; - case FAMILY_RV: m_settings.isArcticIsland = 1; m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision); @@ -1205,7 +1211,10 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily( m_settings.metaBaseAlignFix = 1; - m_settings.depthPipeXorDisable = 1; + if (ASICREV_IS_RAVEN(uChipRevision)) + { + m_settings.depthPipeXorDisable = 1; + } break; default: @@ -1230,6 +1239,7 @@ VOID Gfx9Lib::GetRbEquation( CoordEq* pRbEq, ///< [out] rb equation UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine UINT_32 numSeLog2) ///< [in] number of shader engine + const { // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4; @@ -1250,6 +1260,12 @@ VOID Gfx9Lib::GetRbEquation( (*pRbEq)[0].add(cy); cx++; cy++; + + if (m_settings.applyAliasFix == false) + { + (*pRbEq)[0].add(cy); + } + (*pRbEq)[0].add(cy); start++; } @@ -1583,7 +1599,6 @@ VOID Gfx9Lib::GetPipeEquation( pPipeEq->xorin(xorMask); } } - /** ************************************************************************************************************************ * Gfx9Lib::GetMetaEquation @@ -1591,29 +1606,86 @@ VOID Gfx9Lib::GetPipeEquation( * @brief * Get meta equation for cmask/htile/DCC * @return +* Pointer to a calculated meta equation +************************************************************************************************************************ +*/ +const CoordEq* Gfx9Lib::GetMetaEquation( + const MetaEqParams& metaEqParams) +{ + UINT_32 cachedMetaEqIndex; + + for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++) + { + if (memcmp(&metaEqParams, + &m_cachedMetaEqKey[cachedMetaEqIndex], + static_cast<UINT_32>(sizeof(metaEqParams))) == 0) + { + break; + } + } + + CoordEq* pMetaEq = NULL; + + if (cachedMetaEqIndex < MaxCachedMetaEq) + { + pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex]; + } + else + { + m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams; + + pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++]; + + m_metaEqOverrideIndex %= MaxCachedMetaEq; + + GenMetaEquation(pMetaEq, + metaEqParams.maxMip, + metaEqParams.elementBytesLog2, + metaEqParams.numSamplesLog2, + metaEqParams.metaFlag, + metaEqParams.dataSurfaceType, + metaEqParams.swizzleMode, + metaEqParams.resourceType, + metaEqParams.metaBlkWidthLog2, + metaEqParams.metaBlkHeightLog2, + metaEqParams.metaBlkDepthLog2, + metaEqParams.compBlkWidthLog2, + metaEqParams.compBlkHeightLog2, + metaEqParams.compBlkDepthLog2); + } + + return pMetaEq; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::GenMetaEquation +* +* @brief +* Get meta equation for cmask/htile/DCC +* @return * N/A ************************************************************************************************************************ */ -VOID Gfx9Lib::GetMetaEquation( - CoordEq* pMetaEq, ///< [out] meta equation - UINT_32 maxMip, ///< [in] max mip Id - UINT_32 elementBytesLog2, ///< [in] data surface element bytes - UINT_32 numSamplesLog2, ///< [in] data surface sample count - ADDR2_META_FLAGS metaFlag, ///< [in] meta falg - Gfx9DataType dataSurfaceType, ///< [in] data surface type - AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode - AddrResourceType resourceType, ///< [in] data surface resource type - UINT_32 metaBlkWidthLog2, ///< [in] meta block width - UINT_32 metaBlkHeightLog2, ///< [in] meta block height - UINT_32 metaBlkDepthLog2, ///< [in] meta block depth - UINT_32 compBlkWidthLog2, ///< [in] compress block width - UINT_32 compBlkHeightLog2, ///< [in] compress block height - UINT_32 compBlkDepthLog2) ///< [in] compress block depth +VOID Gfx9Lib::GenMetaEquation( + CoordEq* pMetaEq, ///< [out] meta equation + UINT_32 maxMip, ///< [in] max mip Id + UINT_32 elementBytesLog2, ///< [in] data surface element bytes + UINT_32 numSamplesLog2, ///< [in] data surface sample count + ADDR2_META_FLAGS metaFlag, ///< [in] meta falg + Gfx9DataType dataSurfaceType, ///< [in] data surface type + AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode + AddrResourceType resourceType, ///< [in] data surface resource type + UINT_32 metaBlkWidthLog2, ///< [in] meta block width + UINT_32 metaBlkHeightLog2, ///< [in] meta block height + UINT_32 metaBlkDepthLog2, ///< [in] meta block depth + UINT_32 compBlkWidthLog2, ///< [in] compress block width + UINT_32 compBlkHeightLog2, ///< [in] compress block height + UINT_32 compBlkDepthLog2) ///< [in] compress block depth const { - UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode); + UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode); UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2; - //UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode); // Get the correct data address and rb equation CoordEq dataEq; @@ -1769,16 +1841,15 @@ VOID Gfx9Lib::GetMetaEquation( } } - UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0; - UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0; - CoordEq origRbEquation; + const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0; + const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0; + const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2; + CoordEq origRbEquation; GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2); CoordEq rbEquation = origRbEquation; - UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2; - for (UINT_32 i = 0; i < numRbTotalLog2; i++) { for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--) @@ -1790,18 +1861,41 @@ VOID Gfx9Lib::GetMetaEquation( } } + if (m_settings.applyAliasFix) + { + co.set('z', -1); + } + // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it for (UINT_32 i = 0; i < numRbTotalLog2; i++) { for (UINT_32 j = 0; j < numPipeTotalLog2; j++) { - if (rbEquation[i] == pipeEquation[j]) + BOOL_32 isRbEquationInPipeEquation = FALSE; + + if (m_settings.applyAliasFix) + { + CoordTerm filteredPipeEq; + filteredPipeEq = pipeEquation[j]; + + filteredPipeEq.Filter('>', co, 0, 'z'); + + isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq); + } + else + { + isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]); + } + + if (isRbEquationInPipeEquation) { rbEquation[i].Clear(); } } } + bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {}; + // Loop through each bit of the channel, get the smallest coordinate, // and remove it from the metaaddr, and rb_equation for (UINT_32 i = 0; i < numPipeTotalLog2; i++) @@ -1827,6 +1921,7 @@ VOID Gfx9Lib::GetMetaEquation( if (pipeEquation[i][k] != co) { rbEquation[j].add(pipeEquation[i][k]); + rbAppendedWithPipeBits[j] = true; } } } @@ -1838,7 +1933,18 @@ VOID Gfx9Lib::GetMetaEquation( UINT_32 rbBitsLeft = 0; for (UINT_32 i = 0; i < numRbTotalLog2; i++) { - if (rbEquation[i].getsize() > 0) + BOOL_32 isRbEqAppended = FALSE; + + if (m_settings.applyAliasFix) + { + isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0)); + } + else + { + isRbEqAppended = (rbEquation[i].getsize() > 0); + } + + if (isRbEqAppended) { rbBitsLeft++; rbEquation[i].getsmallest(co); @@ -1860,6 +1966,7 @@ VOID Gfx9Lib::GetMetaEquation( if (rbEquation[i][k] != co) { rbEquation[j].add(rbEquation[i][k]); + rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i]; } } } @@ -1905,7 +2012,18 @@ VOID Gfx9Lib::GetMetaEquation( // Put in remaining rb bits for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2) { - if (rbEquation[i].getsize() > 0) + BOOL_32 isRbEqAppended = FALSE; + + if (m_settings.applyAliasFix) + { + isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0)); + } + else + { + isRbEqAppended = (rbEquation[i].getsize() > 0); + } + + if (isRbEqAppended) { origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]); // Mark any rb bit we add in to the rb mask @@ -2717,7 +2835,8 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode( { BOOL_32 support = FALSE; - //const AddrResourceType resourceType = pIn->resourceType; + const AddrResourceType resourceType = pIn->resourceType; + (void)resourceType; const AddrSwizzleMode swizzleMode = pIn->swizzleMode; if (m_settings.isDce12) @@ -3059,6 +3178,16 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB, }; + enum AddrSwSet + { + AddrSwSetZ = 1 << ADDR_SW_Z, + AddrSwSetS = 1 << ADDR_SW_S, + AddrSwSetD = 1 << ADDR_SW_D, + AddrSwSetR = 1 << ADDR_SW_R, + + AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR, + }; + ADDR_E_RETURNCODE returnCode = ADDR_OK; ElemLib* pElemLib = GetElemLib(); @@ -3109,10 +3238,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( pOut->resourceType = pIn->resourceType; } - ADDR_ASSERT(bpp >= 8u); - UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u); + if (bpp < 8) + { + ADDR_ASSERT_ALWAYS(); - if (IsTex1d(pOut->resourceType)) + returnCode = ADDR_INVALIDPARAMS; + } + else if (IsTex1d(pOut->resourceType)) { pOut->swizzleMode = ADDR_SW_LINEAR; pOut->validBlockSet.value = AddrBlockSetLinear; @@ -3123,7 +3255,15 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( ADDR2_BLOCK_SET blockSet; blockSet.value = 0; - AddrSwType swType = ADDR_SW_S; + ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet; + addrPreferredSwSet.value = AddrSwSetS; + addrValidSwSet = addrPreferredSwSet; + clientPreferredSwSet = pIn->preferredSwSet; + + if (clientPreferredSwSet.value == 0) + { + clientPreferredSwSet.value = AddrSwSetAll; + } // prt Xor and non-xor will have less height align requirement for stereo surface BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE); @@ -3135,8 +3275,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil) { ADDR_ASSERT(IsTex2d(pOut->resourceType)); - blockSet.value = AddrBlockSetMacro; - swType = ADDR_SW_Z; + blockSet.value = AddrBlockSetMacro; + addrPreferredSwSet.value = AddrSwSetZ; + addrValidSwSet.value = AddrSwSetZ; if (pIn->flags.depth && pIn->flags.texture) { @@ -3153,9 +3294,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( } else if (ElemLib::IsBlockCompressed(pIn->format)) { - // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. Not sure - // under what circumstances "_D" would be appropriate as these formats are not - // displayable. + // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. + // Not sure under what circumstances "_D" would be appropriate as these formats + // are not displayable. blockSet.value = AddrBlockSetMacro; // This isn't to be used as texture and caller doesn't allow macro tiled. @@ -3164,15 +3305,19 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( { blockSet.value |= AddrBlockSetLinear; } - swType = ADDR_SW_D; + + addrPreferredSwSet.value = AddrSwSetD; + addrValidSwSet.value = AddrSwSetS | AddrSwSetD; } else if (ElemLib::IsMacroPixelPacked(pIn->format)) { - // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. Its not - // clear under what circumstances the D or R modes would be appropriate since - // these formats are not displayable. - blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro; - swType = ADDR_SW_S; + // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. + // Its notclear under what circumstances the D or R modes would be appropriate + // since these formats are not displayable. + blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro; + + addrPreferredSwSet.value = AddrSwSetS; + addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR; } else if (IsTex3d(pOut->resourceType)) { @@ -3181,28 +3326,38 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( if (pIn->flags.prt) { // PRT cannot use SW_D which gives an unexpected block dimension - swType = ADDR_SW_Z; + addrPreferredSwSet.value = AddrSwSetZ; + addrValidSwSet.value = AddrSwSetZ | AddrSwSetS; } else if ((numMipLevels > 1) && (slice >= width) && (slice >= height)) { // When depth (Z) is the maximum dimension then must use one of the SW_*_S // or SW_*_Z modes if mipmapping is desired on a 3D surface - swType = ADDR_SW_Z; + addrPreferredSwSet.value = AddrSwSetZ; + addrValidSwSet.value = AddrSwSetZ | AddrSwSetS; } else if (pIn->flags.color) { - swType = ADDR_SW_D; + addrPreferredSwSet.value = AddrSwSetD; + addrValidSwSet.value = AddrSwSetZ | AddrSwSetS | AddrSwSetD; } else { - swType = ADDR_SW_Z; + addrPreferredSwSet.value = AddrSwSetZ; + addrValidSwSet.value = AddrSwSetZ | AddrSwSetD; + if (bpp != 128) + { + addrValidSwSet.value |= AddrSwSetS; + } } } else { - swType = ((pIn->flags.display == TRUE) || - (pIn->flags.overlay == TRUE) || - (pIn->bpp == 128)) ? ADDR_SW_D : ADDR_SW_S; + addrPreferredSwSet.value = ((pIn->flags.display == TRUE) || + (pIn->flags.overlay == TRUE) || + (pIn->bpp == 128)) ? AddrSwSetD : AddrSwSetS; + + addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR; if (numMipLevels > 1) { @@ -3223,7 +3378,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( if (displayResource) { - swType = pIn->flags.rotated ? ADDR_SW_R : ADDR_SW_D; + addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD; if (pIn->bpp > 64) { @@ -3238,17 +3393,21 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( // DCE12 does not support display surface to be _T swizzle mode prtXor = FALSE; + + addrValidSwSet.value = AddrSwSetD | AddrSwSetR; } else if (m_settings.isDcn1) { // _R is not supported by Dcn1 if (pIn->bpp == 64) { - swType = ADDR_SW_D; + addrPreferredSwSet.value = AddrSwSetD; + addrValidSwSet.value = AddrSwSetD; } else { - swType = ADDR_SW_S; + addrPreferredSwSet.value = AddrSwSetS; + addrValidSwSet.value = AddrSwSetS | AddrSwSetD; } blockSet.micro = FALSE; @@ -3262,279 +3421,325 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( } } - if ((numFrags > 1) && - (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags))) - { - // MSAA surface must have blk_bytes/pipe_interleave >= num_samples - blockSet.macro4KB = FALSE; - } + ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value); - if (pIn->flags.prt) - { - blockSet.value &= AddrBlockSetMacro64KB; - } + pOut->clientPreferredSwSet = clientPreferredSwSet; + + // Clamp client preferred set to valid set + clientPreferredSwSet.value &= addrValidSwSet.value; - // Apply customized forbidden setting - blockSet.value &= ~pIn->forbiddenBlock.value; + pOut->validSwTypeSet = addrValidSwSet; - if (pIn->maxAlign > 0) + if (clientPreferredSwSet.value == 0) { - if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB)) + // Client asks for an invalid swizzle type... + ADDR_ASSERT_ALWAYS(); + returnCode = ADDR_INVALIDPARAMS; + } + else + { + if (IsPow2(clientPreferredSwSet.value)) + { + // Only one swizzle type left, use it directly + addrPreferredSwSet.value = clientPreferredSwSet.value; + } + else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0) { - blockSet.macro64KB = FALSE; + // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred + if (clientPreferredSwSet.sw_D) + { + addrPreferredSwSet.value = AddrSwSetD; + } + else if (clientPreferredSwSet.sw_Z) + { + addrPreferredSwSet.value = AddrSwSetZ; + } + else if (clientPreferredSwSet.sw_R) + { + addrPreferredSwSet.value = AddrSwSetR; + } + else + { + ADDR_ASSERT(clientPreferredSwSet.sw_S); + addrPreferredSwSet.value = AddrSwSetS; + } } - if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB)) + if ((numFrags > 1) && + (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags))) { + // MSAA surface must have blk_bytes/pipe_interleave >= num_samples blockSet.macro4KB = FALSE; } - if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B)) + if (pIn->flags.prt) { - blockSet.micro = FALSE; + blockSet.value &= AddrBlockSetMacro64KB; } - } - Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; - Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; - UINT_64 padSize[AddrBlockMaxTiledType] = {0}; + // Apply customized forbidden setting + blockSet.value &= ~pIn->forbiddenBlock.value; - if (blockSet.micro) - { - returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w, - &blkAlign[AddrBlockMicro].h, - &blkAlign[AddrBlockMicro].d, - bpp, - numFrags, - pOut->resourceType, - ADDR_SW_256B); - - if (returnCode == ADDR_OK) + if (pIn->maxAlign > 0) { - if (displayResource) + if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB)) { - blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32); + blockSet.macro64KB = FALSE; } - else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) && - (minSizeAlign <= GetBlockSize(ADDR_SW_256B))) + + if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB)) { - // If one 256B block can contain the surface, don't bother bigger block type blockSet.macro4KB = FALSE; - blockSet.macro64KB = FALSE; - blockSet.var = FALSE; } - padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height, - slice, &paddedDim[AddrBlockMicro]); - } - } - - if ((returnCode == ADDR_OK) && blockSet.macro4KB) - { - returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w, - &blkAlign[AddrBlock4KB].h, - &blkAlign[AddrBlock4KB].d, - bpp, - numFrags, - pOut->resourceType, - ADDR_SW_4KB); - - if (returnCode == ADDR_OK) - { - if (displayResource) + if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B)) { - blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32); + blockSet.micro = FALSE; } - - padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height, - slice, &paddedDim[AddrBlock4KB]); - - ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]); } - } - if ((returnCode == ADDR_OK) && blockSet.macro64KB) - { - returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w, - &blkAlign[AddrBlock64KB].h, - &blkAlign[AddrBlock64KB].d, - bpp, - numFrags, - pOut->resourceType, - ADDR_SW_64KB); + Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; + Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; + UINT_64 padSize[AddrBlockMaxTiledType] = {0}; - if (returnCode == ADDR_OK) + if (blockSet.micro) { - if (displayResource) + returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w, + &blkAlign[AddrBlockMicro].h, + &blkAlign[AddrBlockMicro].d, + bpp, + numFrags, + pOut->resourceType, + ADDR_SW_256B); + + if (returnCode == ADDR_OK) { - blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32); - } - - padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height, - slice, &paddedDim[AddrBlock64KB]); - - ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]); - ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]); - } - } + if (displayResource) + { + blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32); + } + else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) && + (minSizeAlign <= GetBlockSize(ADDR_SW_256B))) + { + // If one 256B block can contain the surface, don't bother bigger block type + blockSet.macro4KB = FALSE; + blockSet.macro64KB = FALSE; + blockSet.var = FALSE; + } - if (returnCode == ADDR_OK) - { - for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) - { - padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement); + padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height, + slice, &paddedDim[AddrBlockMicro]); + } } - // Use minimum block type which meets all conditions above if flag minimizeAlign was set - if (pIn->flags.minimizeAlign) + if ((returnCode == ADDR_OK) && blockSet.macro4KB) { - // If padded size of 64KB block is larger than padded size of 256B block or 4KB - // block, filter out 64KB block from candidate list - if (blockSet.macro64KB && - ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) || - (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB])))) + returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w, + &blkAlign[AddrBlock4KB].h, + &blkAlign[AddrBlock4KB].d, + bpp, + numFrags, + pOut->resourceType, + ADDR_SW_4KB); + + if (returnCode == ADDR_OK) { - blockSet.macro64KB = FALSE; - } + if (displayResource) + { + blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32); + } - // If padded size of 4KB block is larger than padded size of 256B block, - // filter out 4KB block from candidate list - if (blockSet.macro4KB && - blockSet.micro && - (padSize[AddrBlockMicro] < padSize[AddrBlock4KB])) - { - blockSet.macro4KB = FALSE; + padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height, + slice, &paddedDim[AddrBlock4KB]); + + ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]); } } - // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint - else if (pIn->flags.opt4space) - { - UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] : - (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]); - threshold += threshold >> 1; - - if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold)) + if ((returnCode == ADDR_OK) && blockSet.macro64KB) + { + returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w, + &blkAlign[AddrBlock64KB].h, + &blkAlign[AddrBlock64KB].d, + bpp, + numFrags, + pOut->resourceType, + ADDR_SW_64KB); + + if (returnCode == ADDR_OK) { - blockSet.macro64KB = FALSE; - } + if (displayResource) + { + blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32); + } - if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold)) - { - blockSet.macro4KB = FALSE; + padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height, + slice, &paddedDim[AddrBlock64KB]); + + ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]); + ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]); } } - else + + if (returnCode == ADDR_OK) { - if (blockSet.macro64KB && - (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) && - ((blockSet.value & ~AddrBlockSetMacro64KB) != 0)) + UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u); + + for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) { - // If 64KB block waste more than half memory on padding, filter it out from - // candidate list when it is not the only choice left - blockSet.macro64KB = FALSE; + padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement); } - } - if (blockSet.value == 0) - { - // Bad things happen, client will not get any useful information from AddrLib. - // Maybe we should fill in some output earlier instead of outputing nothing? - ADDR_ASSERT_ALWAYS(); - returnCode = ADDR_INVALIDPARAMS; - } - else - { - pOut->validBlockSet = blockSet; - pOut->canXor = pOut->canXor && - (blockSet.macro4KB || blockSet.macro64KB || blockSet.var); - - if (blockSet.macro64KB || blockSet.macro4KB) + // Use minimum block type which meets all conditions above if flag minimizeAlign was set + if (pIn->flags.minimizeAlign) { - if (swType == ADDR_SW_Z) - { - pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z; - } - else if (swType == ADDR_SW_S) + // If padded size of 64KB block is larger than padded size of 256B block or 4KB + // block, filter out 64KB block from candidate list + if (blockSet.macro64KB && + ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) || + (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB])))) { - pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S; + blockSet.macro64KB = FALSE; } - else if (swType == ADDR_SW_D) + + // If padded size of 4KB block is larger than padded size of 256B block, + // filter out 4KB block from candidate list + if (blockSet.macro4KB && + blockSet.micro && + (padSize[AddrBlockMicro] < padSize[AddrBlock4KB])) { - pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D; + blockSet.macro4KB = FALSE; } - else + } + // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint + else if (pIn->flags.opt4space) + { + UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] : + (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]); + + threshold += threshold >> 1; + + if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold)) { - ADDR_ASSERT(swType == ADDR_SW_R); - pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R; + blockSet.macro64KB = FALSE; } - if (prtXor && blockSet.macro64KB) + if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold)) { - // Client wants PRTXOR, give back _T swizzle mode if 64KB is available - const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z; - pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap); + blockSet.macro4KB = FALSE; } - else if (pOut->canXor) + } + else + { + if (blockSet.macro64KB && + (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) && + ((blockSet.value & ~AddrBlockSetMacro64KB) != 0)) { - // Client wants XOR and this is allowed, return XOR version swizzle mode - const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z; - pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap); + // If 64KB block waste more than half memory on padding, filter it out from + // candidate list when it is not the only choice left + blockSet.macro64KB = FALSE; } } - else if (blockSet.micro) + + if (blockSet.value == 0) + { + // Bad things happen, client will not get any useful information from AddrLib. + // Maybe we should fill in some output earlier instead of outputing nothing? + ADDR_ASSERT_ALWAYS(); + returnCode = ADDR_INVALIDPARAMS; + } + else { - if (swType == ADDR_SW_S) + pOut->validBlockSet = blockSet; + pOut->canXor = pOut->canXor && + (blockSet.macro4KB || blockSet.macro64KB || blockSet.var); + + if (blockSet.macro64KB || blockSet.macro4KB) + { + if (addrPreferredSwSet.value == AddrSwSetZ) + { + pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z; + } + else if (addrPreferredSwSet.value == AddrSwSetS) + { + pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S; + } + else if (addrPreferredSwSet.value == AddrSwSetD) + { + pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D; + } + else + { + ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR); + pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R; + } + + if (prtXor && blockSet.macro64KB) + { + // Client wants PRTXOR, give back _T swizzle mode if 64KB is available + const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z; + pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap); + } + else if (pOut->canXor) + { + // Client wants XOR and this is allowed, return XOR version swizzle mode + const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z; + pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap); + } + } + else if (blockSet.micro) { - pOut->swizzleMode = ADDR_SW_256B_S; + if (addrPreferredSwSet.value == AddrSwSetS) + { + pOut->swizzleMode = ADDR_SW_256B_S; + } + else if (addrPreferredSwSet.value == AddrSwSetD) + { + pOut->swizzleMode = ADDR_SW_256B_D; + } + else + { + ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR); + pOut->swizzleMode = ADDR_SW_256B_R; + } } - else if (swType == ADDR_SW_D) + else if (blockSet.linear) { - pOut->swizzleMode = ADDR_SW_256B_D; + // Fall into this branch doesn't mean linear is suitable, only no other choices! + pOut->swizzleMode = ADDR_SW_LINEAR; } else { - ADDR_ASSERT(swType == ADDR_SW_R); - pOut->swizzleMode = ADDR_SW_256B_R; - } - } - else if (blockSet.linear) - { - // Fall into this branch doesn't mean linear is suitable, only no other choices! - pOut->swizzleMode = ADDR_SW_LINEAR; - } - else - { - ADDR_ASSERT(blockSet.var); + ADDR_ASSERT(blockSet.var); - // Designer consider VAR swizzle mode is usless for most cases - ADDR_UNHANDLED_CASE(); + // Designer consider VAR swizzle mode is usless for most cases + ADDR_UNHANDLED_CASE(); - returnCode = ADDR_NOTSUPPORTED; - } + returnCode = ADDR_NOTSUPPORTED; + } #if DEBUG - // Post sanity check, at least AddrLib should accept the output generated by its own - if (pOut->swizzleMode != ADDR_SW_LINEAR) - { - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; - localIn.flags = pIn->flags; - localIn.swizzleMode = pOut->swizzleMode; - localIn.resourceType = pOut->resourceType; - localIn.format = pIn->format; - localIn.bpp = bpp; - localIn.width = width; - localIn.height = height; - localIn.numSlices = slice; - localIn.numMipLevels = numMipLevels; - localIn.numSamples = numSamples; - localIn.numFrags = numFrags; - - HwlComputeSurfaceInfoSanityCheck(&localIn); - - // TODO : check all valid block type available in validBlockSet? - } + // Post sanity check, at least AddrLib should accept the output generated by its own + if (pOut->swizzleMode != ADDR_SW_LINEAR) + { + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; + localIn.flags = pIn->flags; + localIn.swizzleMode = pOut->swizzleMode; + localIn.resourceType = pOut->resourceType; + localIn.format = pIn->format; + localIn.bpp = bpp; + localIn.width = width; + localIn.height = height; + localIn.numSlices = slice; + localIn.numMipLevels = numMipLevels; + localIn.numSamples = numSamples; + localIn.numFrags = numFrags; + + HwlComputeSurfaceInfoSanityCheck(&localIn); + + } #endif + } } } } @@ -3709,53 +3914,48 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices); - pOut->epitchIsHeight = FALSE; - pOut->mipChainInTail = FALSE; + pOut->epitchIsHeight = FALSE; + pOut->mipChainInTail = FALSE; + pOut->firstMipIdInTail = pIn->numMipLevels; - pOut->mipChainPitch = pOut->pitch; - pOut->mipChainHeight = pOut->height; - pOut->mipChainSlice = pOut->numSlices; + pOut->mipChainPitch = pOut->pitch; + pOut->mipChainHeight = pOut->height; + pOut->mipChainSlice = pOut->numSlices; if (pIn->numMipLevels > 1) { - UINT_32 numMipLevel; - ADDR2_MIP_INFO *pMipInfo; - ADDR2_MIP_INFO mipInfo[4]; - - if (pOut->pMipInfo != NULL) + pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType, + pIn->swizzleMode, + pIn->bpp, + pIn->width, + pIn->height, + pIn->numSlices, + pOut->blockWidth, + pOut->blockHeight, + pOut->blockSlices, + pIn->numMipLevels, + pOut->pMipInfo); + + const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1); + + if (endingMipId == 0) { - pMipInfo = pOut->pMipInfo; - numMipLevel = pIn->numMipLevels; - } - else - { - pMipInfo = mipInfo; - numMipLevel = Min(pIn->numMipLevels, 4u); - } + const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType, + pIn->swizzleMode, + pOut->blockWidth, + pOut->blockHeight, + pOut->blockSlices); - UINT_32 endingMip = GetMipChainInfo(pIn->resourceType, - pIn->swizzleMode, - pIn->bpp, - pIn->width, - pIn->height, - pIn->numSlices, - pOut->blockWidth, - pOut->blockHeight, - pOut->blockSlices, - numMipLevel, - pMipInfo); - - if (endingMip == 0) - { pOut->epitchIsHeight = TRUE; - pOut->pitch = pMipInfo[0].pitch; - pOut->height = pMipInfo[0].height; - pOut->numSlices = pMipInfo[0].depth; + pOut->pitch = tailMaxDim.w; + pOut->height = tailMaxDim.h; + pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ? + tailMaxDim.d : pIn->numSlices; pOut->mipChainInTail = TRUE; } else { - UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth; + UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth; UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight; AddrMajorMode majorMode = GetMajorMode(pIn->resourceType, @@ -3767,7 +3967,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( { UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk); - if ((mip1WidthInBlk == 1) && (endingMip > 2)) + if ((mip1WidthInBlk == 1) && (endingMipId > 2)) { mip1WidthInBlk++; } @@ -3780,7 +3980,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( { UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk); - if ((mip1HeightInBlk == 1) && (endingMip > 2)) + if ((mip1HeightInBlk == 1) && (endingMipId > 2)) { mip1HeightInBlk++; } @@ -3821,22 +4021,22 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( UINT_64 macroBlockOffset = blockIndex << GetBlockSizeLog2(pIn->swizzleMode); - pMipInfo[i].macroBlockOffset = macroBlockOffset; - pMipInfo[i].mipTailOffset = mipTailOffsetInBytes; + pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset; + pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes; } } } else if (pOut->pMipInfo != NULL) { - pOut->pMipInfo[0].pitch = pOut->pitch; + pOut->pMipInfo[0].pitch = pOut->pitch; pOut->pMipInfo[0].height = pOut->height; - pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1; + pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1; pOut->pMipInfo[0].offset = 0; } pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight * (pIn->bpp >> 3) * pIn->numFrags; - pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice; + pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice; pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode); if (pIn->flags.prt) @@ -3851,6 +4051,95 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( /** ************************************************************************************************************************ +* Gfx9Lib::HwlComputeSurfaceInfoLinear +* +* @brief +* Internal function to calculate alignment for linear surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + UINT_32 pitch = 0; + UINT_32 actualHeight = 0; + UINT_32 elementBytes = pIn->bpp >> 3; + const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256; + + if (IsTex1d(pIn->resourceType)) + { + if (pIn->height > 1) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + const UINT_32 pitchAlignInElement = alignment / elementBytes; + + pitch = PowTwoAlign(pIn->width, pitchAlignInElement); + actualHeight = pIn->numMipLevels; + + if (pIn->flags.prt == FALSE) + { + returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, + &pitch, &actualHeight); + } + + if (returnCode == ADDR_OK) + { + if (pOut->pMipInfo != NULL) + { + for (UINT_32 i = 0; i < pIn->numMipLevels; i++) + { + pOut->pMipInfo[i].offset = pitch * elementBytes * i; + pOut->pMipInfo[i].pitch = pitch; + pOut->pMipInfo[i].height = 1; + pOut->pMipInfo[i].depth = 1; + } + } + } + } + } + else + { + returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo); + } + + if ((pitch == 0) || (actualHeight == 0)) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + pOut->pitch = pitch; + pOut->height = pIn->height; + pOut->numSlices = pIn->numSlices; + pOut->mipChainPitch = pitch; + pOut->mipChainHeight = actualHeight; + pOut->mipChainSlice = pOut->numSlices; + pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE; + pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes; + pOut->surfSize = pOut->sliceSize * pOut->numSlices; + pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment; + pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes); + pOut->blockHeight = 1; + pOut->blockSlices = 1; + } + + // Post calculation validate + ADDR_ASSERT(pOut->sliceSize > 0); + + return returnCode; +} + +/** +************************************************************************************************************************ * Gfx9Lib::GetMipChainInfo * * @brief @@ -3876,16 +4165,15 @@ UINT_32 Gfx9Lib::GetMipChainInfo( const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth); - UINT_32 mipPitch = mip0Width; - UINT_32 mipHeight = mip0Height; - UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1; - UINT_32 offset = 0; - UINT_32 endingMip = numMipLevel - 1; - BOOL_32 inTail = FALSE; - BOOL_32 finalDim = FALSE; - - BOOL_32 is3dThick = IsThick(resourceType, swizzleMode); - BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE); + UINT_32 mipPitch = mip0Width; + UINT_32 mipHeight = mip0Height; + UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1; + UINT_32 offset = 0; + UINT_32 firstMipIdInTail = numMipLevel; + BOOL_32 inTail = FALSE; + BOOL_32 finalDim = FALSE; + BOOL_32 is3dThick = IsThick(resourceType, swizzleMode); + BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE); for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++) { @@ -3931,10 +4219,9 @@ UINT_32 Gfx9Lib::GetMipChainInfo( if (inTail) { - endingMip = mipId; - - mipPitch = tailMaxDim.w; - mipHeight = tailMaxDim.h; + firstMipIdInTail = mipId; + mipPitch = tailMaxDim.w; + mipHeight = tailMaxDim.h; if (is3dThick) { @@ -3953,10 +4240,14 @@ UINT_32 Gfx9Lib::GetMipChainInfo( } } - pMipInfo[mipId].pitch = mipPitch; - pMipInfo[mipId].height = mipHeight; - pMipInfo[mipId].depth = mipDepth; - pMipInfo[mipId].offset = offset; + if (pMipInfo != NULL) + { + pMipInfo[mipId].pitch = mipPitch; + pMipInfo[mipId].height = mipHeight; + pMipInfo[mipId].depth = mipDepth; + pMipInfo[mipId].offset = offset; + } + offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3)); if (finalDim) @@ -3978,7 +4269,7 @@ UINT_32 Gfx9Lib::GetMipChainInfo( } } - return endingMip; + return firstMipIdInTail; } /** @@ -3999,7 +4290,7 @@ VOID Gfx9Lib::GetMetaMiptailInfo( Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth ) const { - BOOL_32 isThick = (pMetaBlkDim->d > 1); + BOOL_32 isThick = (pMetaBlkDim->d > 1); UINT_32 mipWidth = pMetaBlkDim->w; UINT_32 mipHeight = pMetaBlkDim->h >> 1; UINT_32 mipDepth = pMetaBlkDim->d; @@ -4557,5 +4848,72 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( return returnCode; } +/** +************************************************************************************************************************ +* Gfx9Lib::ComputeSurfaceInfoLinear +* +* @brief +* Internal function to calculate padding for linear swizzle 2D/3D surface +* +* @return +* N/A +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture + UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element + UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW + ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + UINT_32 elementBytes = pIn->bpp >> 3; + UINT_32 pitchAlignInElement = 0; + + if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) + { + ADDR_ASSERT(pIn->numMipLevels <= 1); + ADDR_ASSERT(pIn->numSlices <= 1); + pitchAlignInElement = 1; + } + else + { + pitchAlignInElement = (256 / elementBytes); + } + + UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement); + UINT_32 slice0PaddedHeight = pIn->height; + + returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, + &mipChainWidth, &slice0PaddedHeight); + + if (returnCode == ADDR_OK) + { + UINT_32 mipChainHeight = 0; + UINT_32 mipHeight = pIn->height; + + for (UINT_32 i = 0; i < pIn->numMipLevels; i++) + { + if (pMipInfo != NULL) + { + pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes; + pMipInfo[i].pitch = mipChainWidth; + pMipInfo[i].height = mipHeight; + pMipInfo[i].depth = 1; + } + + mipChainHeight += mipHeight; + mipHeight = RoundHalf(mipHeight); + mipHeight = Max(mipHeight, 1u); + } + + *pMipmap0PaddedWidth = mipChainWidth; + *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight; + } + + return returnCode; +} + } // V2 } // Addr diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.h b/src/amd/addrlib/gfx9/gfx9addrlib.h index 418ccac5142..1f233a4ff91 100644 --- a/src/amd/addrlib/gfx9/gfx9addrlib.h +++ b/src/amd/addrlib/gfx9/gfx9addrlib.h @@ -65,7 +65,9 @@ struct Gfx9ChipSettings // Misc configuration bits UINT_32 metaBaseAlignFix : 1; UINT_32 depthPipeXorDisable : 1; - UINT_32 reserved2 : 30; + UINT_32 htileAlignFix : 1; + UINT_32 applyAliasFix : 1; + UINT_32 reserved2 : 28; }; }; @@ -83,6 +85,28 @@ enum Gfx9DataType /** ************************************************************************************************************************ +* @brief GFX9 meta equation parameters +************************************************************************************************************************ +*/ +struct MetaEqParams +{ + UINT_32 maxMip; + UINT_32 elementBytesLog2; + UINT_32 numSamplesLog2; + ADDR2_META_FLAGS metaFlag; + Gfx9DataType dataSurfaceType; + AddrSwizzleMode swizzleMode; + AddrResourceType resourceType; + UINT_32 metaBlkWidthLog2; + UINT_32 metaBlkHeightLog2; + UINT_32 metaBlkDepthLog2; + UINT_32 compBlkWidthLog2; + UINT_32 compBlkHeightLog2; + UINT_32 compBlkDepthLog2; +}; + +/** +************************************************************************************************************************ * @brief This class is the GFX9 specific address library * function set. ************************************************************************************************************************ @@ -139,31 +163,31 @@ protected: virtual ADDR_E_RETURNCODE HwlComputeHtileInfo( const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo( const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; virtual ADDR_E_RETURNCODE HwlComputeDccInfo( const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const; + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const; virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr( - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const; + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord( - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) const; + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); virtual UINT_32 HwlGetEquationIndex( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, @@ -206,17 +230,7 @@ protected: if (IsXor(swizzleMode)) { - if (m_settings.isVega10 || m_settings.isRaven) - { - baseAlign = GetBlockSize(swizzleMode); - } - else - { - UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode); - UINT_32 pipeBits = GetPipeXorBits(blockSizeLog2); - UINT_32 bankBits = GetBankXorBits(blockSizeLog2); - baseAlign = 1 << (Min(blockSizeLog2, m_pipeInterleaveLog2 + pipeBits+ bankBits)); - } + baseAlign = GetBlockSize(swizzleMode); } else { @@ -249,6 +263,10 @@ protected: const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled( const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; @@ -352,6 +370,10 @@ protected: return compressBlkDim; } + + static const UINT_32 MaxSeLog2 = 3; + static const UINT_32 MaxRbPerSeLog2 = 2; + static const Dim3d Block256_3dS[MaxNumOfBpp]; static const Dim3d Block256_3dZ[MaxNumOfBpp]; @@ -375,6 +397,8 @@ protected: // Equation lookup table according to bpp and tile index UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2]; + static const UINT_32 MaxCachedMetaEq = 2; + private: virtual ADDR_E_RETURNCODE HwlGetMaxAlignments( ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const; @@ -382,7 +406,7 @@ private: virtual BOOL_32 HwlInitGlobalParams( const ADDR_CREATE_INPUT* pCreateIn); - static VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2); + VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const; VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType, AddrSwizzleMode swizzleMode, AddrResourceType resourceType, @@ -393,7 +417,7 @@ private: UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType, AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const; - VOID GetMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip, + VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip, UINT_32 elementBytesLog2, UINT_32 numSamplesLog2, ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType, AddrSwizzleMode swizzleMode, AddrResourceType resourceType, @@ -401,6 +425,8 @@ private: UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2, UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const; + const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams); + virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim, @@ -408,7 +434,17 @@ private: UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth, UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const; + ADDR_E_RETURNCODE ComputeSurfaceLinearPadding( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + UINT_32* pMipmap0PaddedWidth, + UINT_32* pSlice0PaddedHeight, + ADDR2_MIP_INFO* pMipInfo = NULL) const; + Gfx9ChipSettings m_settings; + + CoordEq m_cachedMetaEq[MaxCachedMetaEq]; + MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq]; + UINT_32 m_metaEqOverrideIndex; }; } // V2 diff --git a/src/amd/addrlib/gfx9/rbmap.cpp b/src/amd/addrlib/gfx9/rbmap.cpp deleted file mode 100644 index 789140d7c0e..00000000000 --- a/src/amd/addrlib/gfx9/rbmap.cpp +++ /dev/null @@ -1,1388 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -// This class generates rb id map based rb id equations - -//#define DPI_DEBUG 1 -// Unlock more verbose debug messages (V* borrows from dj -v * to indicate most verbosity) -//#define DPI_DEBUG_V4 1 -//#define DPI_DEBUG_V5 1 -//#define DPI_DEBUG_PIPE_CASES 1 -// "----+----|----+----|----+----|----+----|" -#include "addrcommon.h" -#include "rbmap.h" - -RB_MAP::RB_MAP(void) -{ - Initialize(); -} - -VOID RB_MAP::Get_Comp_Block_Screen_Space( CoordEq& addr, int bytes_log2, int* w, int* h, int* d) -{ - int n, i; - if( w ) *w = 0; - if( h ) *h = 0; - if( d ) *d = 0; - for( n=0; n<bytes_log2; n++ ) { // go up to the bytes_log2 bit - for( i=0; (unsigned)i<addr[n].getsize(); i++ ) { - char dim = addr[n][i].getdim(); - int ord = addr[n][i].getord(); - if( w && dim == 'x' && ord >= *w ) *w = ord+1; - if( h && dim == 'y' && ord >= *h ) *h = ord+1; - if( d && dim == 'z' && ord >= *d ) *d = ord+1; - } - } -} - -void -RB_MAP::Get_Meta_Block_Screen_Space( int num_comp_blocks_log2, bool is_thick, bool y_biased, - int comp_block_width_log2, int comp_block_height_log2, int comp_block_depth_log2, - - // Outputs - int& meta_block_width_log2, int& meta_block_height_log2, int& meta_block_depth_log2 ) -{ - meta_block_width_log2 = comp_block_width_log2; - meta_block_height_log2 = comp_block_height_log2; - meta_block_depth_log2 = comp_block_depth_log2; - int n; - - for( n=0; n<num_comp_blocks_log2; n++ ) { - if( (meta_block_height_log2 < meta_block_width_log2) || - (y_biased && (meta_block_height_log2 == meta_block_width_log2)) ) { - if ( !is_thick || (meta_block_height_log2 <= meta_block_depth_log2) ) - meta_block_height_log2++; - else - meta_block_depth_log2++; - } - else { - if ( !is_thick || (meta_block_width_log2 <= meta_block_depth_log2) ) - meta_block_width_log2++; - else - meta_block_depth_log2++; - } - } -} - -void -RB_MAP::cap_pipe( int xmode, bool is_thick, int& num_ses_log2, int bpp_log2, int num_samples_log2, int pipe_interleave_log2, int& block_size_log2, int& num_pipes_log2 ) -{ - // pipes+SEs can't exceed 32 for now - if( num_pipes_log2+num_ses_log2 > 5 ) { - num_pipes_log2 = 5-num_ses_log2; - } - - // Since we are not supporting SE affinity anymore, just add nu_ses to num_pipes, and set num_ses to 0 - num_pipes_log2 += num_ses_log2; - num_ses_log2 = 0; - - // If block size is set to variable (0), compute the size - if( block_size_log2 == 0 ) { - // - //TODO Temporary disable till RTL can drive Var signals properly - } - - if( xmode != NONE ) { - int max_pipes_log2 = block_size_log2 - pipe_interleave_log2; - if( is_thick ) { - // For 3d, treat the num_pipes as the sum of num_pipes and gpus - num_pipes_log2 = num_pipes_log2 + num_ses_log2; - num_ses_log2 = 0; - } else { - int block_space_used = num_pipes_log2+pipe_interleave_log2; - if( block_space_used < 10+bpp_log2 ) block_space_used = 10+bpp_log2; - // if the num gpus exceeds however many bits we have left between block size and block_space_used+num_samples - // then set num_ses_log2 to 0 - if( num_ses_log2 > block_size_log2 - block_space_used - num_samples_log2) { - num_pipes_log2 = num_pipes_log2 + num_ses_log2; - num_ses_log2 = 0; - } - } - if( num_pipes_log2 > max_pipes_log2 ) { - // If it exceeds the space we have left, cap it to that - num_pipes_log2 = max_pipes_log2; - } - } else { - num_pipes_log2 = num_pipes_log2 + num_ses_log2; - num_ses_log2 = 0; - } -} - -void RB_MAP::Get_Data_Offset_Equation( CoordEq& data_eq, int data_type, int bpp_log2, int num_samples_log2, int block_size_log2 ) -{ - bool is_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR ); - bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z ); - bool is_color = ( data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED ); - bool is_s = ( data_type == DATA_COLOR3D_S ); - Coordinate cx( 'x', 0 ); - Coordinate cy( 'y', 0 ); - Coordinate cz( 'z', 0 ); - Coordinate cs( 's', 0 ); - // Clear the equation - data_eq.resize(0); - data_eq.resize(27); - if( block_size_log2 == 0 ) block_size_log2 = 16; - - if( is_linear ) { - Coordinate cm( 'm', 0 ); - int i; - data_eq.resize(49); - for( i=0; i<49; i++ ) { - data_eq[i].add(cm); - cm++; - } - } else if( is_thick ) { - // Color 3d (_S and _Z modes; _D is same as color 2d) - int i; - if( is_s ) { - // Standard 3d swizzle - // Fill in bottom x bits - for( i=bpp_log2; i<4; i++ ) { - data_eq[i].add(cx); - cx++; - } - // Fill in 2 bits of y and then z - for( i=4; i<6; i++ ) { - data_eq[i].add(cy); - cy++; - } - for( i=6; i<8; i++ ) { - data_eq[i].add(cz); - cz++; - } - if (bpp_log2 < 2) { - // fill in z & y bit - data_eq[8].add(cz); - data_eq[9].add(cy); - cz++; - cy++; - } else if( bpp_log2 == 2 ) { - // fill in y and x bit - data_eq[8].add(cy); - data_eq[9].add(cx); - cy++; - cx++; - } else { - // fill in 2 x bits - data_eq[8].add(cx); - cx++; - data_eq[9].add(cx); - cx++; - } - } else { - // Z 3d swizzle - int m2d_end = (bpp_log2==0) ? 3 : ((bpp_log2 < 4) ? 4 : 5); - int num_zs = (bpp_log2==0 || bpp_log2==4) ? 2 : ((bpp_log2==1) ? 3 : 1); - data_eq.mort2d( cx, cy, bpp_log2, m2d_end ); - for( i=m2d_end+1; i<=m2d_end+num_zs; i++ ) { - data_eq[i].add(cz); - cz++; - } - if( bpp_log2 == 0 || bpp_log2 == 3 ) { - // add an x and z - data_eq[6].add(cx); - data_eq[7].add(cz); - cx++; - cz++; - } else if( bpp_log2 == 2 ) { - // add a y and z - data_eq[6].add(cy); - data_eq[7].add(cz); - cy++; - cz++; - } - // add y and x - data_eq[8].add(cy); - data_eq[9].add(cx); - cy++; - cx++; - } - // Fill in bit 10 and up - data_eq.mort3d( cz, cy, cx, 10 ); - } else if( is_color ) { - // Color 2D - int micro_y_bits = (8-bpp_log2) / 2; - int tile_split_start = block_size_log2 - num_samples_log2; - int i; - // Fill in bottom x bits - for( i=bpp_log2;i<4; i++ ) { - data_eq[i].add(cx); - cx++; - } - // Fill in bottom y bits - for( i=4; i<4+micro_y_bits; i++ ) { - data_eq[i].add(cy); - cy++; - } - // Fill in last of the micro_x bits - for( i=4+micro_y_bits; i<8; i++ ) { - data_eq[i].add(cx); - cx++; - } - // Fill in x/y bits below sample split - data_eq.mort2d( cy, cx, 8, tile_split_start-1 ); - // Fill in sample bits - for( i=0; i<num_samples_log2; i++ ) { - cs.set( 's', i ); - data_eq[tile_split_start+i].add(cs); - } - // Fill in x/y bits above sample split - if( (num_samples_log2 & 1) ^ (block_size_log2 & 1) ) data_eq.mort2d( cx, cy, block_size_log2 ); - else data_eq.mort2d( cy, cx, block_size_log2 ); - } else { - // Z, stencil or fmask - // First, figure out where each section of bits starts - int sample_start = bpp_log2; - int pixel_start = bpp_log2 + num_samples_log2; - int y_maj_start = 6 + num_samples_log2; - - // Put in sample bits - int s; - for( s=0; s<num_samples_log2; s++ ) { - cs.set( 's', s ); - data_eq[sample_start+s].add(cs); - } - // Put in the x-major order pixel bits - data_eq.mort2d( cx, cy, pixel_start, y_maj_start-1 ); - // Put in the y-major order pixel bits - data_eq.mort2d( cy, cx, y_maj_start ); - } -} - -void RB_MAP::Get_RB_Equation( CoordEq& rb_equation, int num_ses_log2, int num_rbs_log2 ) -{ - // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32 - int rb_region = (num_rbs_log2 == 0) ? 5 : 4; - Coordinate cx( 'x', rb_region ); - Coordinate cy( 'y', rb_region ); - int i, start = 0, num_total_rbs_log2 = num_ses_log2 + num_rbs_log2; - // Clear the rb equation - rb_equation.resize(0); - rb_equation.resize(num_total_rbs_log2); - if( num_ses_log2 > 0 && num_rbs_log2 == 1 ) { - // Special case when more than 1 SE, and only 1 RB per SE - rb_equation[0].add(cx); - rb_equation[0].add(cy); - cx++; - cy++; - rb_equation[0].add(cy); - start++; - } - for( i=0; i<2*(num_total_rbs_log2-start); i++ ) { - int index = start + (((start+i)>=num_total_rbs_log2) ? 2*(num_total_rbs_log2-start)-i-1 : i); - Coordinate& c = ((i % 2) == 1) ? cx : cy; - rb_equation[index].add(c); - c++; - } -} - -//void getcheq( CoordEq& pipe_equation, CoordEq& addr, int pipe_interleave_log2, int num_pipes_log2, -void -RB_MAP::Get_Pipe_Equation( CoordEq& pipe_equation, CoordEq& addr, - int pipe_interleave_log2, - int num_pipes_log2, - - int block_size_log2, - int num_samples_log2, - - int xmode, int data_type - ) -{ - int pipe; - CoordEq addr_f, xormask, xormask2; - Coordinate tile_min( 'x', 3 ); - - bool is_color = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR2D_LINEAR || data_type == DATA_COLOR3D_D_NOT_USED ); - bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z ); - - // For color, filter out sample bits only - // otherwise filter out everything under an 8x8 tile - if( is_color ) - tile_min.set( 'x', 0 ); - - addr.copy( addr_f ); - - // Z/stencil is no longer tile split - if( is_color ) - addr_f.shift( -num_samples_log2, block_size_log2- num_samples_log2 ); - - int i; - addr_f.copy( pipe_equation, pipe_interleave_log2, num_pipes_log2 ); //@todo kr needs num_ses_log2?? - - - // This section should only apply to z/stencil, maybe fmask - // If the pipe bit is below the comp block size, then keep moving up the address until we find a bit that is above - for( pipe=0; addr_f[pipe_interleave_log2 + pipe][0] < tile_min; pipe++ ) { - } - - // if pipe is 0, then the first pipe bit is above the comp block size, so we don't need to do anything - // Note, this if condition is not necessary, since if we execute the loop when pipe==0, we will get the same pipe equation - if ( pipe != 0 ) { - int j = pipe; - - - for( i=0; i<num_pipes_log2; i++ ) { - // Copy the jth bit above pipe interleave to the current pipe equation bit - addr_f[pipe_interleave_log2 + j].copyto(pipe_equation[i]); - j++; - - - } - - - } - - if( xmode == PRT ) { - // Clear out bits above the block size if prt's are enabled - addr_f.resize(block_size_log2); - addr_f.resize(48); - } - - if( xmode != NONE ) { - if( is_thick ) { - addr_f.copy( xormask2, pipe_interleave_log2+num_pipes_log2, 2*num_pipes_log2 ); - - xormask.resize( num_pipes_log2 ); - for( pipe=0; pipe<num_pipes_log2; pipe++ ) { - xormask[pipe].add( xormask2[2*pipe] ); - xormask[pipe].add( xormask2[2*pipe+1] ); - } - } else { - Coordinate co; - // Xor in the bits above the pipe+gpu bits - addr_f.copy( xormask, pipe_interleave_log2 + pipe + num_pipes_log2, num_pipes_log2 ); - if( num_samples_log2 == 0 && (xmode != PRT) ) { - // if 1xaa and not prt, then xor in the z bits - xormask2.resize(0); - xormask2.resize(num_pipes_log2); - for( pipe=0; pipe<num_pipes_log2; pipe++ ) { - co.set( 'z', num_pipes_log2-1 - pipe ); - xormask2[pipe].add( co ); - } - - pipe_equation.xorin( xormask2 ); - } - } - - xormask.reverse(); - pipe_equation.xorin( xormask ); - - } -} - -void RB_MAP::get_meta_miptail_coord( int& x, int& y, int& z, int mip_in_tail, int blk_width_log2, int blk_height_log2, int blk_depth_log2 ) -{ - bool is_thick = (blk_depth_log2>0); - int m; - int mip_width = 1 << blk_width_log2; - int mip_height = 1 << (blk_height_log2-1); - int mip_depth = 1 << blk_depth_log2; - - // Find the minimal increment, based on the block size and 2d/3d - int min_inc; - if(is_thick) { - min_inc = (blk_height_log2 >= 9) ? 128 : ((blk_height_log2 == 8) ? 64 : 32); - } else if(blk_height_log2>=10) { - min_inc = 256; - } else if(blk_height_log2==9) { - min_inc = 128; - } else { - min_inc = 64; - } - - for( m=0; m<mip_in_tail; m++ ) { - if( mip_width <= 32 ) { - // special case when below 32x32 mipmap - switch(mip_in_tail-m) { - case 0: break; // 32x32 - case 1: x+=32; break; // 16x16 - case 2: y+=32; break; // 8x8 - case 3: y+=32; x+=16; break;// 4x4 - case 4: y+=32; x+=32; break;// 2x2 - case 5: y+=32; x+=48; break;// 1x1 - // The following are for BC/ASTC formats - case 6: y+=48; break; // 1/2 x 1/2 - case 7: y+=48; x+=16; break;// 1/4 x 1/4 - case 8: y+=48; x+=32; break;// 1/8 x 1/8 - default:y+=48; x+=48; break;// 1/16 x 1/16 - } - m = mip_in_tail; // break the loop - } else { - if( mip_width <= min_inc ) { - // if we're below the minimal increment... - if( is_thick ) { - // For 3d, just go in z direction - z += mip_depth; - } else { - // For 2d, first go across, then down - if( mip_width * 2 == min_inc ) { - // if we're 2 mips below, that's when we go back in x, and down in y - x -= min_inc; - y += min_inc; - } else { - // otherwise, just go across in x - x += min_inc; - } - } - } else { - // On even mip, go down, otherwise, go across - if( m&1 ) { - x += mip_width; - } else { - y += mip_height; - } - } - // Divide the width by 2 - mip_width = mip_width / 2; - // After the first mip in tail, the mip is always a square - mip_height = mip_width; - // ...or for 3d, a cube - if(is_thick) mip_depth = mip_width; - } - } -} - -void RB_MAP::get_mip_coord( int& x, int& y, int& z, int mip, - int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2, - int data_blk_width_log2, int data_blk_height_log2, - int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip, - int data_type, int bpp_log2, bool meta_linear ) -{ - if( meta_linear ) { - get_mip_coord_linear( x, y, z, mip, data_blk_width_log2, data_blk_height_log2, - surf_width, surf_height, surf_depth, epitch, max_mip, data_type, bpp_log2 ); - } else { - get_mip_coord_nonlinear( x, y, z, mip, meta_blk_width_log2, meta_blk_height_log2, meta_blk_depth_log2, - surf_width, surf_height, surf_depth, epitch, max_mip, data_type ); - } -} - -void RB_MAP::get_mip_coord_linear( int& x, int& y, int& z, - int mip, - int data_blk_width_log2, int data_blk_height_log2, - int& surf_width, int& surf_height, int& surf_depth, int epitch, - int max_mip, int data_type, int bpp_log2 - ) -{ - bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR ); - - if( data_linear ) { - // linear width is padded out to 256 Bytes - int width_padding = 8 - bpp_log2; - int width_pad_mask = ~(0xffffffff << width_padding); - int padded_surf_width = surf_width; - int padded_surf_height = (data_type == DATA_COLOR1D) ? 1 : surf_height; - - if( max_mip > 0 ) { - int mip_width = padded_surf_width; - int mip_height = padded_surf_height; - int padded_mip_height = 0; - int mip_base = 0; - int m = 0; - while( (mip_width >= 1 || mip_height >= 1) && m <= max_mip ) { - if( mip == m ) mip_base = padded_mip_height; - padded_mip_height += mip_height; - m++; - mip_width = (mip_width / 2) + (mip_width & 1); - mip_height = (mip_height / 2) + (mip_height & 1); - } - if( mip >= m ) { - // assert error - mip_base = padded_mip_height - mip_height; - } - padded_surf_height = padded_mip_height; - - if(epitch > 0){ - padded_surf_height = epitch; - } - y += mip_base; - padded_surf_width = ((surf_width >> width_padding) + ((surf_width & width_pad_mask) ? 1 : 0)) << width_padding; - } - else{ - padded_surf_width = ((surf_width >> width_padding) + ((surf_width & width_pad_mask) ? 1 : 0)) << width_padding; - - // Pad up epitch to meta block width - if( (epitch & width_pad_mask) != 0 ) { - epitch = ((epitch >> width_padding) + 1) << width_padding; - } - // Take max of epitch and computed surf width - if( epitch < padded_surf_width ) { - // assert error - } else { - padded_surf_width = epitch; - } - } - - surf_width = padded_surf_width; - surf_height = padded_surf_height; - } - else { - // padding based data block size - int width_pad_mask = ~(0xffffffff << data_blk_width_log2); - int height_pad_mask = ~(0xffffffff << data_blk_height_log2); - - // Pad the data surface dimensions by the block dimensions, and put the result in compressed block dimension units - surf_width = ((surf_width >> data_blk_width_log2) + ((surf_width & width_pad_mask) ? 1 : 0)) << data_blk_width_log2; - surf_height = ((surf_height >> data_blk_height_log2) + ((surf_height & height_pad_mask) ? 1 : 0)) << data_blk_height_log2; - - // Tiled data, linear metadata - if( max_mip > 0 ) { - // we don't allow mipmapping on tiled data, with linear metadata - // assert error - } - - // Pad up epitch to data block width - if( (epitch & width_pad_mask) != 0 ) { - epitch = ((epitch >> data_blk_width_log2) + 1) << data_blk_width_log2; - } - // Take max of epitch and computed surf width - if( epitch < surf_width ) { - // assert error - } else { - surf_width = epitch; - } - } -} - -void RB_MAP::get_mip_coord_nonlinear( int& x, int& y, int& z, - int mip, - int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2, - - // Outputs - int& surf_width, int& surf_height, int& surf_depth, - - int epitch, int max_mip, int data_type - ) -{ - bool is3d = (data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED ); - int order; // 0 = xmajor, 1 = ymajor, 2 = zmajor - - int mip_width = surf_width; - int mip_height = surf_height; - int mip_depth = (is3d) ? surf_depth : 1; - - // Divide surface w/h/d by block size, padding if needed - surf_width = (((surf_width & ((1<<meta_blk_width_log2 )-1)) != 0) ? 1 : 0) + (surf_width >> meta_blk_width_log2); - surf_height = (((surf_height & ((1<<meta_blk_height_log2)-1)) != 0) ? 1 : 0) + (surf_height >> meta_blk_height_log2); - surf_depth = (((surf_depth & ((1<<meta_blk_depth_log2 )-1)) != 0) ? 1 : 0) + (surf_depth >> meta_blk_depth_log2); - epitch = (((epitch & ((1<<meta_blk_width_log2 )-1)) != 0) ? 1 : 0) + (epitch >> meta_blk_width_log2); - - if( max_mip > 0 ) { - // Determine major order - if( is3d && surf_depth > surf_width && surf_depth > surf_height ) { - order = 2; // Z major - } - else if( surf_width >= surf_height ) { - order = 0; // X major - } - else { - order = 1; // Y major - } - - // Check if mip 0 is in the tail - bool in_tail = (mip_width <= (1<<meta_blk_width_log2)) && - (mip_height <= (1<<(meta_blk_height_log2-1))) && - (!is3d || (mip_depth <= (1<<meta_blk_depth_log2))); - // Pad the mip w/h/d, which is just the surf w/h/d times blk dim - mip_width = surf_width << meta_blk_width_log2; - mip_height = surf_height << meta_blk_height_log2; - mip_depth = surf_depth << meta_blk_depth_log2; - - if( !in_tail ) { - // Select the dimension that stores the mip chain, based on major order - // Then pad it out to max(2, ceil(mip_dim/2)) - int& mip_dim = (order == 1) ? surf_width : surf_height; - // in y-major, if height > 2 blocks, then we need extra padding; - // in x or z major, it only occurs if width/depth is greater than 4 blocks - // Height is special, since we can enter the mip tail when height is 1/2 block high - int order_dim_limit = (order == 1) ? 2 : 4; - int& order_dim = (order == 0) ? surf_width : ((order == 1) ? surf_height : surf_depth); - if( mip_dim < 3 && order_dim > order_dim_limit && max_mip >= 3 ) mip_dim += 2; - else mip_dim += (mip_dim/2) + (mip_dim&1); - } - - int m; - for( m=0; m<mip; m++ ) { - if( in_tail ) { - get_meta_miptail_coord( x, y, z, mip-m, meta_blk_width_log2, meta_blk_height_log2, meta_blk_depth_log2 ); - m = mip; // break the loop - } else { - // Move either x, y, or z by the mip dimension based on which mip we're on and the order - if(m>=3 || m&1) { - switch(order) { - case 0: x += mip_width; break; - case 1: y += mip_height; break; - case 2: z += mip_depth; break; - } - } else { - switch(order) { - case 0: y += mip_height; break; - case 1: x += mip_width; break; - case 2: y += mip_height; break; - } - } - // Compute next mip's dimensions - mip_width = (mip_width/2); - mip_height = (mip_height/2); - mip_depth = (mip_depth/2); - // See if it's in the tail - in_tail = (mip_width <= (1<<meta_blk_width_log2)) && - (mip_height <= (1<<(meta_blk_height_log2-1))) && - (!is3d || (mip_depth <= (1<<meta_blk_depth_log2))); - // Pad out mip dimensions - mip_width = ((mip_width >> meta_blk_width_log2) + ((mip_width & ((1<<meta_blk_width_log2) -1)) != 0)) << meta_blk_width_log2; - mip_height = ((mip_height >> meta_blk_height_log2) + ((mip_height & ((1<<meta_blk_height_log2)-1)) != 0)) << meta_blk_height_log2; - mip_depth = ((mip_depth >> meta_blk_depth_log2) + ((mip_depth & ((1<<meta_blk_depth_log2) -1)) != 0)) << meta_blk_depth_log2; - } - } - } else { - // Take max of epitch and computed surf width - surf_width = (surf_width > epitch) ? surf_width : epitch; - } - - // Multiply the surface dimension by block size - surf_width = surf_width << meta_blk_width_log2; - surf_height = surf_height << meta_blk_height_log2; - surf_depth = surf_depth << meta_blk_depth_log2; - -} - -void -RB_MAP::get_meta_eq( CoordEq& metaaddr, - int max_mip, int num_ses_log2, int num_rbs_log2, - int &num_pipes_log2, - int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2, - int pipe_interleave_log2, - int xmode, - int data_type, - int meta_alignment, bool meta_linear) -{ - // Metaaddressing - Coordinate co; - CoordEq cur_rbeq, pipe_equation, orig_pipe_equation; - - bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR ); - bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED ); - //bool is3d = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED ); - bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z ); - - bool is_fmask = (data_type == DATA_FMASK); - bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB); - bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB); - - bool is_mipmapped = (max_mip > 0) ? true : false; - - int pipe_mask = 0x0; - int comp_frag_log2 = (is_color && (num_samples_log2 > max_comp_frag_log2)) ? max_comp_frag_log2 : num_samples_log2; - - int uncomp_frag_log2 = num_samples_log2 - comp_frag_log2; - - // Constraints on linear - if ( data_linear ) { - xmode = NONE; - num_samples_log2 = 0; - is_rb_aligned = false; - meta_linear = true; - } - if( meta_linear && !data_linear ) { - is_pipe_aligned = false; - } - - // Min metablock size if thick is 64KB, otherwise 4KB - int min_meta_block_size_log2 = (is_thick) ? 16 : 12; - - // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil - int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2); - - int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2; - - // Get the total # of RB's before modifying due to rb align - int num_total_rbs_pre_rb_align_log2 = num_ses_log2 + num_rbs_log2; - - // Cap the pipe bits to block size - int num_ses_data_log2 = num_ses_log2; - cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, - num_samples_log2, pipe_interleave_log2, block_size_log2, num_pipes_log2 ); - - // if not pipe aligned, set num_pipes_log2, num_ses_log2 to 0 - if( !is_pipe_aligned ) { - num_pipes_log2 = 0; - num_ses_data_log2 = 0; - } - - // Get the correct data address and rb equation - CoordEq dataaddr; - Get_Data_Offset_Equation( dataaddr, - (meta_linear) ? DATA_COLOR1D : data_type, - bpp_log2, num_samples_log2, block_size_log2 ); - - - // if not rb aligned, set num_ses_log2/rbs_log2 to 0; note, this is done after generating the data equation - if( !is_rb_aligned ) { - num_ses_log2 = 0; - num_rbs_log2 = 0; - } - - // Get pipe and rb equations - Get_Pipe_Equation( pipe_equation, dataaddr, pipe_interleave_log2, - num_pipes_log2, block_size_log2, num_samples_log2, xmode, data_type ); - - CoordEq& this_rbeq = rb_equation[num_ses_log2][num_rbs_log2]; - - num_pipes_log2 = pipe_equation.getsize(); - - if( meta_linear ) { - dataaddr.copy( metaaddr ); - if( data_linear ) { - if( is_pipe_aligned ) { - // Remove the pipe bits - metaaddr.shift( -num_pipes_log2, pipe_interleave_log2 ); - } - // Divide by comp block size, which for linear (which is always color) is 256 B - metaaddr.shift( -8 ); - if( is_pipe_aligned ) { - // Put pipe bits back in - metaaddr.shift( num_pipes_log2, pipe_interleave_log2 ); - int i; - for( i=0; i<num_pipes_log2; i++ ) { - pipe_equation[i].copyto(metaaddr[pipe_interleave_log2+i]); - } - } - } - metaaddr.shift( 1 ); - return; - } - - int i, j, k, old_size, new_size; - int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2; - - // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region - int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0; - int comp_blk_size_log2 = 8; - - // For color surfaces, compute the comp block width, height, and depth - // For non-color surfaces, compute the comp block size - if( is_color ) { - Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 ); - metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces - } - else { - comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2; - } - - // Compute meta block width and height - int num_comp_blks_per_meta_blk; - if (num_pipes_log2==0 && num_ses_log2==0 && num_rbs_log2==0) { - num_comp_blks_per_meta_blk = metadata_words_per_page_log2; - } - else { - num_comp_blks_per_meta_blk = num_total_rbs_pre_rb_align_log2 + ((is_thick) ? 18 : 10); - - if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) - num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2; - - if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk ) - num_comp_blks_per_meta_blk = metadata_words_per_page_log2; - } - - int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2; - Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk, is_thick, is_mipmapped, // mipmaps should be y-biased - comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2, - meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2 ); - - // Make sure the metaaddr is cleared - metaaddr.resize(0); - metaaddr.resize(27); - - //------------------------------------------------------------------------------------------------------------------------ - // Use the growing square or growing cube order for thick as a starting point for the metadata address - //------------------------------------------------------------------------------------------------------------------------ - if( is_thick ) { - Coordinate cx( 'x', 0 ); - Coordinate cy( 'y', 0 ); - Coordinate cz( 'z', 0 ); - if(is_mipmapped) { - metaaddr.mort3d( cy, cx, cz ); - } else { - metaaddr.mort3d( cx, cy, cz ); - } - } - else { - Coordinate cx( 'x', 0 ); - Coordinate cy( 'y', 0 ); - Coordinate cs; - - if(is_mipmapped) { - metaaddr.mort2d( cy, cx, comp_frag_log2 ); - } else { - metaaddr.mort2d( cx, cy, comp_frag_log2 ); - } - - //------------------------------------------------------------------------------------------------------------------------ - // Put the compressible fragments at the lsb - // the uncompressible frags will be at the msb of the micro address - //------------------------------------------------------------------------------------------------------------------------ - int s; - for( s=0; s<comp_frag_log2; s++ ) { - cs.set( 's', s ); - metaaddr[s].add(cs); - } - } - - // Keep a copy of the pipe and rb equations - this_rbeq.copy( cur_rbeq ); - pipe_equation.copy( orig_pipe_equation ); - - // filter out everything under the compressed block size - co.set( 'x', comp_blk_width_log2 ); - metaaddr.Filter( '<', co, 0, 'x' ); - co.set( 'y', comp_blk_height_log2 ); - metaaddr.Filter( '<', co, 0, 'y' ); - co.set( 'z', comp_blk_depth_log2 ); - metaaddr.Filter( '<', co, 0, 'z' ); - // For non-color, filter out sample bits - if( !is_color ) { - co.set( 'x', 0 ); - metaaddr.Filter( '<', co, 0, 's' ); - } - - // filter out everything above the metablock size - co.set( 'x', meta_block_width_log2-1 ); - metaaddr.Filter( '>', co, 0, 'x' ); - co.set( 'y', meta_block_height_log2-1 ); - metaaddr.Filter( '>', co, 0, 'y' ); - co.set( 'z', meta_block_depth_log2-1 ); - metaaddr.Filter( '>', co, 0, 'z' ); - - // filter out everything above the metablock size for the channel bits - co.set( 'x', meta_block_width_log2-1 ); - pipe_equation.Filter( '>', co, 0, 'x' ); - co.set( 'y', meta_block_height_log2-1 ); - pipe_equation.Filter( '>', co, 0, 'y' ); - co.set( 'z', meta_block_depth_log2-1 ); - pipe_equation.Filter( '>', co, 0, 'z' ); - - // Make sure we still have the same number of channel bits - if( pipe_equation.getsize() != static_cast<UINT_32>(num_pipes_log2) ) { - // assert - } - - // Loop through all channel and rb bits, and make sure these components exist in the metadata address - for( i=0; i<num_pipes_log2; i++ ) { - for( j=pipe_equation[i].getsize()-1; j>=0; j-- ) { - if( !metaaddr.Exists( pipe_equation[i][j] ) ) { - // assert - } - } - } - for( i=0; i<num_total_rbs_log2; i++ ) { - for( j=cur_rbeq[i].getsize()-1; j>=0; j-- ) { - if( !metaaddr.Exists( cur_rbeq[i][j] ) ) { - // assert - } - } - } - - // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it - int old_rb_bits_left = num_total_rbs_log2; - for( i=0; i<num_total_rbs_log2; i++ ) { - for(j=0; j<num_pipes_log2; j++ ) { - if( cur_rbeq[i] == pipe_equation[j] ) { - cur_rbeq[i].Clear(); - old_rb_bits_left--; - // Mark which pipe bit caused the RB bit to be dropped - pipe_mask |= (1 << j); - } - } - } - - // Loop through each bit of the channel, get the smallest coordinate, and remove it from the metaaddr, and rb_equation - for( i=0; i<num_pipes_log2; i++ ) { - pipe_equation[i].getsmallest( co ); - - old_size = metaaddr.getsize(); - metaaddr.Filter( '=', co ); - new_size = metaaddr.getsize(); - if( new_size != old_size-1 ) { - // assert warning - } - pipe_equation.remove( co ); - for( j=0; j<num_total_rbs_log2; j++ ) { - if( cur_rbeq[j].remove( co ) ) { - // if we actually removed something from this bit, then add the remaining - // channel bits, as these can be removed for this bit - for( k=0; (unsigned)k<pipe_equation[i].getsize(); k++ ) { - if( pipe_equation[i][k] != co ) { - cur_rbeq[j].add( pipe_equation[i][k] ); - } - } - // if the rb bit is still empty, then we have to mark all pipe bits as affecting the RB - if( cur_rbeq[j].getsize() == 0 ) { - pipe_mask = (1 << num_pipes_log2) - 1; - } - } - } - } - - // Loop through the rb bits and see what remain; filter out the smallest coordinate if it remains - int rb_bits_left = 0; - for( i=0; i<num_total_rbs_log2; i++ ) { - if( cur_rbeq[i].getsize() > 0 ) { - rb_bits_left++; - cur_rbeq[i].getsmallest( co ); - old_size = metaaddr.getsize(); - metaaddr.Filter( '=', co ); - new_size = metaaddr.getsize(); - if( new_size != old_size-1 ) { - // assert warning - } - for( j=i+1; j<num_total_rbs_log2; j++ ) { - if( cur_rbeq[j].remove( co ) ) { - // if we actually removed something from this bit, then add the remaining - // rb bits, as these can be removed for this bit - for( k=0; (unsigned)k<cur_rbeq[i].getsize(); k++ ) { - if( cur_rbeq[i][k] != co ) { - cur_rbeq[j].add( cur_rbeq[i][k] ); - } - } - } - } - } - } - - // capture the size of the metaaddr - i = metaaddr.getsize(); - // resize to 49 bits...make this a nibble address - metaaddr.resize(49); - // Concatenate the macro address above the current address - for( j=0; i<49; i++, j++ ) { - co.set( 'm', j ); - metaaddr[i].add( co ); - } - - // Multiply by meta element size (in nibbles) - if( is_color ) { - metaaddr.shift( 1 ); // Byte size element - } else if( data_type == DATA_Z_STENCIL ) { - metaaddr.shift( 3 ); // 4 Byte size elements - } - - //------------------------------------------------------------------------------------------------------------------------ - // Note the pipe_interleave_log2+1 is because address is a nibble address - // Shift up from pipe interleave number of channel and rb bits left, and uncompressed fragments - //------------------------------------------------------------------------------------------------------------------------ - - metaaddr.shift( num_pipes_log2 + rb_bits_left + uncomp_frag_log2, - pipe_interleave_log2+1 ); - - // Put in the channel bits - for( i=0; i<num_pipes_log2; i++ ) { - orig_pipe_equation[i].copyto( metaaddr[pipe_interleave_log2+1 + i] ); - } - - // Put in remaining rb bits - i = 0; - for( j=0; j<rb_bits_left; i=(i+1) % num_total_rbs_log2 ) { - if( cur_rbeq[i].getsize() > 0 ) { - rb_equation[num_ses_log2][num_rbs_log2][i].copyto( metaaddr[pipe_interleave_log2+1 + num_pipes_log2 + j] ); - // Mark any rb bit we add in to the rb mask - j++; - } - } - - //------------------------------------------------------------------------------------------------------------------------ - // Put in the uncompressed fragment bits - //------------------------------------------------------------------------------------------------------------------------ - for( i=0; i<uncomp_frag_log2; i++ ) { - co.set( 's', comp_frag_log2+i ); - metaaddr[pipe_interleave_log2+1 + num_pipes_log2 + rb_bits_left + i].add( co ); - } - - - //------------------------------------------------------------------------------------------------------------------------ - // Check that the metadata SE bits match the data address - //------------------------------------------------------------------------------------------------------------------------ - for( i=0; i<num_ses_data_log2; i++ ) { - if(num_total_rbs_log2-num_ses_data_log2+i >= 0){ - if( metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] != dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] || - metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] != rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i]) { - //FIXME: Removed to prevent logs from growing large in size // cout << "Warning: GPU bit " << i << " differs from data addr or RB equation on " << data_name << title << endl; - //FIXME: Removed to prevent logs from growing large in size // cout << " Data: " << dataaddr[ pipe_interleave_log2 + num_pipes_log2-num_ses_data_log2 + i ] << endl; - //FIXME: Removed to prevent logs from growing large in size // cout << "MData: " << metaaddr[ pipe_interleave_log2+1 + num_pipes_log2-num_ses_data_log2 + i ] << endl; - //FIXME: Removed to prevent logs from growing large in size // cout << " RBeq: " << rb_equation[num_ses_log2][num_rbs_log2][num_total_rbs_log2-num_ses_data_log2+i] << endl; - //FIXME: Removed to prevent logs from growing large in size // cout << " Pipe: " << orig_pipe_equation << endl; - //FIXME: Removed to prevent logs from growing large in size // cout << " DEq: " << dataaddr << endl; - } - } - } -} - -long -RB_MAP::get_meta_addr_calc( int x, int y, int z, int s, - long surf_base, int element_bytes_log2, int num_samples_log2, int max_comp_frag_log2, - long pitch, long slice, - int max_mip, - - //int swizzle_mode, - int xmode, int pipe_xor, int block_size_log2, - - /*int num_banks_log2,*/ - int num_pipes_log2, - int pipe_interleave_log2, - - int meta_alignment, - int dim_type, - int x_mip_org, int y_mip_org, int z_mip_org, - - int num_ses_log2, int num_rbs_log2, - /*bool se_affinity_enable, */ - - int data_type, - - int l2_metablk_w, int l2_metablk_h, int l2_metablk_d, - bool meta_linear - ) -{ - int bpp_log2 = element_bytes_log2; - int mip_base_x = x_mip_org; - int mip_base_y = y_mip_org; - int mip_base_z = z_mip_org; - - CoordEq metaaddr; - - //bool se_affinity_enable = false; - //int max_pipe_bytes = std::max(1<<num_pipes_log2 * 1<<pipe_interleave_log2, 1024 * 1<<log2_element_bytes); - //int max_banks_samples = std::max(1<<num_banks_log2, 1<<num_samples_log2); - //int block_size_log2 = max(4096, max_pipe_bytes * max_bank_samples * 1<<num_ses_log2); - - bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR ); - bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED ); - bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z ); - bool is_fmask = (data_type == DATA_FMASK); - - bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB); - bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB); - - if ( data_linear ) - meta_linear = true; - - if ( !data_linear && meta_linear) - max_mip = 0; - - // Min metablock size if thick is 64KB, otherwise 4KB - int min_meta_block_size_log2 = (is_thick) ? 16 : 12; - - // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil - int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2); - int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2; - - int num_ses_data_log2 = num_ses_log2; - int block_size_data_log2 = block_size_log2; - int num_pipes_data_log2 = num_pipes_log2; - - //int num_banks_data_log2 = num_banks_log2; - cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, num_samples_log2, pipe_interleave_log2, block_size_data_log2, num_pipes_data_log2/*, num_banks_data_log2 */); - - // Get the correct data address and rb equation - CoordEq dataaddr; - Get_Data_Offset_Equation( dataaddr, data_type, bpp_log2, num_samples_log2, block_size_data_log2 ); - - get_meta_eq( metaaddr, max_mip, num_ses_log2, num_rbs_log2, num_pipes_log2, /*num_banks_log2,*/ block_size_log2, - bpp_log2, num_samples_log2, max_comp_frag_log2, pipe_interleave_log2, xmode, - data_type, meta_alignment, meta_linear); - // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region - int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0; - int comp_blk_size_log2 = 8; - - if ( is_color ){ - Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 ); - metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces - } - else { - comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2; - } - - // Compute meta block width and height - int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2; - int num_comp_blks_per_meta_blk; - if((!is_pipe_aligned || num_pipes_log2==0) && (!is_rb_aligned || (num_ses_log2==0 && num_rbs_log2==0))) { - num_comp_blks_per_meta_blk = metadata_words_per_page_log2; - } - else { - num_comp_blks_per_meta_blk = num_total_rbs_log2 + ((is_thick) ? 18 : 10); - if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2; - if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk ) - num_comp_blks_per_meta_blk = metadata_words_per_page_log2; - } - - int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2; - - //@@todo kr missing meta_block_width* - - // Get the data block size - int data_block_width_log2, data_block_height_log2, data_block_depth_log2; - - Get_Meta_Block_Screen_Space( block_size_log2 - comp_blk_size_log2, - is_thick, true, - comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2, - data_block_width_log2, data_block_height_log2, data_block_depth_log2 ); - - meta_block_width_log2 = l2_metablk_w; - meta_block_height_log2 = l2_metablk_h; - meta_block_depth_log2 = l2_metablk_d; - - int meta_x = mip_base_x + x ; - int meta_y = mip_base_y + y ; - int meta_z = mip_base_z + z ; - - if( meta_linear ){ - if(!data_linear) { - // Tiled data, linear metadata - meta_x = meta_x >> comp_blk_width_log2; - meta_y = meta_y >> comp_blk_height_log2; - meta_z = meta_z >> comp_blk_depth_log2; - pitch = pitch >> comp_blk_width_log2; - slice = slice >> (comp_blk_width_log2 + comp_blk_height_log2); - } - else{ - meta_x = meta_x << bpp_log2; - meta_y = meta_y << bpp_log2; - meta_z = meta_z << bpp_log2; - } - } - else{ - meta_x = meta_x >> meta_block_width_log2; - meta_y = meta_y >> meta_block_height_log2; - meta_z = meta_z >> meta_block_depth_log2; - - pitch = pitch >> meta_block_width_log2; - slice = slice >> (meta_block_width_log2 + meta_block_height_log2); - } - - long macroaddr = (long)meta_x + (long)meta_y*(long)pitch + (long)meta_z*(long)slice; - - int mip_tail_x, mip_tail_y, mip_tail_z; - mip_tail_x = mip_base_x & ((1 << meta_block_width_log2 )-1); - mip_tail_y = mip_base_y & ((1 << meta_block_height_log2)-1); - mip_tail_z = mip_base_z & ((1 << meta_block_depth_log2)-1); - - int mip_x = x + mip_tail_x; - int mip_y = y + mip_tail_y; - int mip_z = z + mip_tail_z; - - // the pipe_interleave_log2+1 is because we are dealing with nibble addresses - long pipe_xor_mask = (pipe_xor & ((1 << num_pipes_data_log2)-1)) << (pipe_interleave_log2+1); - - // shift surf_base to make it a nibble address - long meta_offset_from_base_nibble_address = metaaddr.solve( mip_x, mip_y, mip_z, s, macroaddr ); - - long address = (surf_base << 1) + (meta_offset_from_base_nibble_address ^ pipe_xor_mask); - - return address; -} - -#if 0 -long -RB_MAP::get_meta_addr( int x, int y, int z, int s, int mip, - int surf_width, int surf_height, int surf_depth, int lpitch, - long surf_base, int pipe_xor, int max_mip, - int num_ses_log2, int num_rbs_log2, int num_pipes_log2, - int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2, - int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear) -{ - CoordEq metaaddr; - - bool data_linear = ( data_type == DATA_COLOR1D || data_type == DATA_COLOR2D_LINEAR ); - bool is_color = ( data_linear || data_type == DATA_COLOR2D || data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z || data_type == DATA_COLOR3D_D_NOT_USED ); - bool is_thick = ( data_type == DATA_COLOR3D_S || data_type == DATA_COLOR3D_Z ); - bool is_fmask = (data_type == DATA_FMASK); - - bool is_pipe_aligned = (meta_alignment == META_ALIGN_PIPE) || (meta_alignment == META_ALIGN_PIPE_RB); - bool is_rb_aligned = (meta_alignment == META_ALIGN_RB) || (meta_alignment == META_ALIGN_PIPE_RB); - - bool is_mipmapped = (max_mip > 0) ? true : false; - - if( data_linear ) meta_linear = true; - // Don't allow mipmapping on the tiled data, meta linear case - // or if we have linear 2d/3d surface - - #ifdef ADDRESS__LPITCH_DISABLE__0 - if( (!data_linear && meta_linear) || (data_type == DATA_COLOR2D_LINEAR) ) max_mip = 0; - #else - if( !data_linear && meta_linear) max_mip = 0; - #endif - - // Min metablock size if thick is 64KB, otherwise 4KB - int min_meta_block_size_log2 = (is_thick) ? 16 : 12; - - - // metadata word size is 1/2 byte for cmask, 1 byte for color, and 4 bytes for z/stencil - int metadata_word_size_log2 = (is_fmask) ? -1 : ((is_color) ? 0 : 2); - int metadata_words_per_page_log2 = min_meta_block_size_log2 - metadata_word_size_log2; - - // Cap the pipe bits to block size - int num_ses_data_log2 = num_ses_log2; - int block_size_data_log2 = block_size_log2; - int num_pipes_data_log2 = num_pipes_log2; - - cap_pipe( xmode, is_thick, num_ses_data_log2, bpp_log2, num_samples_log2, pipe_interleave_log2, block_size_data_log2, num_pipes_data_log2 ); - - // Get the correct data address and rb equation - CoordEq dataaddr; - Get_Data_Offset_Equation( dataaddr, data_type, bpp_log2, num_samples_log2, block_size_data_log2 ); - - get_meta_eq( metaaddr, max_mip, num_ses_log2, num_rbs_log2, num_pipes_log2, block_size_log2, - bpp_log2, num_samples_log2, max_comp_frag_log2, pipe_interleave_log2, xmode, data_type, - meta_alignment, meta_linear); - - // For non-color surfaces, compessed block size is always 8x8; for color, it's always a 256 bytes sized region - int comp_blk_width_log2 = 3, comp_blk_height_log2 = 3, comp_blk_depth_log2 = 0; - int comp_blk_size_log2 = 8; - - if ( is_color ) { - Get_Comp_Block_Screen_Space( dataaddr, comp_blk_size_log2, &comp_blk_width_log2, &comp_blk_height_log2, &comp_blk_depth_log2 ); - metadata_words_per_page_log2 -= num_samples_log2; // factor out num fragments for color surfaces - } else { - comp_blk_size_log2 = 6 + num_samples_log2 + bpp_log2; - } - - // Compute meta block width and height - int num_total_rbs_log2 = num_ses_log2 + num_rbs_log2; - - int num_comp_blks_per_meta_blk; - if((!is_pipe_aligned || num_pipes_log2==0) && (!is_rb_aligned || (num_ses_log2==0 && num_rbs_log2==0))) { - num_comp_blks_per_meta_blk = metadata_words_per_page_log2; - } - else { - num_comp_blks_per_meta_blk = num_total_rbs_log2 + ((is_thick) ? 18 : 10); - - if( num_comp_blks_per_meta_blk + comp_blk_size_log2 > 27+bpp_log2) num_comp_blks_per_meta_blk = 27+bpp_log2 - comp_blk_size_log2; - - if( metadata_words_per_page_log2 > num_comp_blks_per_meta_blk ) - num_comp_blks_per_meta_blk = metadata_words_per_page_log2; - } - - int meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2; - - - Get_Meta_Block_Screen_Space( num_comp_blks_per_meta_blk, is_thick, is_mipmapped, - comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2, - meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2 ); - - // Get the data block size - int data_block_width_log2, data_block_height_log2, data_block_depth_log2; - - Get_Meta_Block_Screen_Space( block_size_log2 - comp_blk_size_log2, is_thick, true, - comp_blk_width_log2, comp_blk_height_log2, comp_blk_depth_log2, - data_block_width_log2, data_block_height_log2, data_block_depth_log2 ); - - int meta_x, meta_y, meta_z; - int meta_surf_width = surf_width; - int meta_surf_height = surf_height; - int meta_surf_depth = surf_depth; - - int mip_base_x=0, mip_base_y=0, mip_base_z=0; - get_mip_coord( mip_base_x, mip_base_y, mip_base_z, mip, - meta_block_width_log2, meta_block_height_log2, meta_block_depth_log2, - data_block_width_log2, data_block_height_log2, - meta_surf_width, meta_surf_height, meta_surf_depth, lpitch, max_mip, - data_type, bpp_log2, meta_linear ); - - meta_x = mip_base_x + x; - meta_y = mip_base_y + y; - meta_z = mip_base_z + z; - - if( meta_linear ) { - if( !data_linear ) { - // Tiled data, linear metadata - meta_x = meta_x >> comp_blk_width_log2; - meta_y = meta_y >> comp_blk_height_log2; - meta_z = meta_z >> comp_blk_depth_log2; - meta_surf_width = meta_surf_width >> comp_blk_width_log2; - meta_surf_height = meta_surf_height >> comp_blk_height_log2; - } - else{ - meta_x = meta_x << bpp_log2; - meta_y = meta_y << bpp_log2; - meta_z = meta_z << bpp_log2; - } - } else { - meta_x = meta_x >> meta_block_width_log2; - meta_y = meta_y >> meta_block_height_log2; - meta_z = meta_z >> meta_block_depth_log2; - meta_surf_width = meta_surf_width >> meta_block_width_log2; - meta_surf_height = meta_surf_height >> meta_block_height_log2; - } - - long macroaddr = (long)meta_x + (long)meta_y*(long)meta_surf_width + (long)meta_z*(long)meta_surf_width*(long)meta_surf_height; - - int mip_tail_x, mip_tail_y, mip_tail_z; - mip_tail_x = mip_base_x & ((1 << meta_block_width_log2 )-1); - mip_tail_y = mip_base_y & ((1 << meta_block_height_log2)-1); - mip_tail_z = mip_base_z & ((1 << meta_block_depth_log2)-1); - - int mip_x = x + mip_tail_x; - int mip_y = y + mip_tail_y; - int mip_z = z + mip_tail_z; - - // the pipe_interleave_log2+1 is because we are dealing with nibble addresses - long pipe_xor_mask = (pipe_xor & ((1 << num_pipes_data_log2)-1)) << (pipe_interleave_log2+1); - - // shift surf_base to make it a nibble address - long address = (surf_base << 1) + (metaaddr.solve( mip_x, mip_y, mip_z, s, macroaddr ) ^ pipe_xor_mask); - - return address; -} -#endif - -void -RB_MAP::Initialize() -{ - int num_se_log2, num_rb_per_se_log2; - for( num_se_log2=0; num_se_log2<5; num_se_log2++ ) { - for( num_rb_per_se_log2=0; num_rb_per_se_log2<3; num_rb_per_se_log2++ ) { - Get_RB_Equation( rb_equation[num_se_log2][num_rb_per_se_log2], num_se_log2, num_rb_per_se_log2 ); - } - } - - int pix_size_log2, num_samples_log2; - for( pix_size_log2=0; pix_size_log2<4; pix_size_log2++ ) { - for( num_samples_log2=0; num_samples_log2<4; num_samples_log2++ ) { - Get_Data_Offset_Equation( zaddr[pix_size_log2][num_samples_log2], DATA_Z_STENCIL, pix_size_log2, num_samples_log2, 16 ); - } - } - - for( pix_size_log2=0; pix_size_log2<5; pix_size_log2++ ) { - for( num_samples_log2=0; num_samples_log2<4; num_samples_log2++ ) { - Get_Data_Offset_Equation( caddr[pix_size_log2][num_samples_log2], DATA_COLOR2D, pix_size_log2, num_samples_log2, 16 ); - } - } - - for( pix_size_log2=0; pix_size_log2<5; pix_size_log2++ ) { - Get_Data_Offset_Equation( c3addr[pix_size_log2][0], DATA_COLOR3D_S, pix_size_log2, 0, 16 ); - Get_Data_Offset_Equation( c3addr[pix_size_log2][1], DATA_COLOR3D_Z, pix_size_log2, 0, 16 ); - } -} - diff --git a/src/amd/addrlib/gfx9/rbmap.h b/src/amd/addrlib/gfx9/rbmap.h deleted file mode 100644 index 89c8922d3fe..00000000000 --- a/src/amd/addrlib/gfx9/rbmap.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -// This class RB_MAP contains the top-level calculation functions which are used to generate rb id map based rb id equations - -#ifndef __RB_MAP_H -#define __RB_MAP_H - -#include "coord.h" - -class RB_MAP -{ -public: - - enum MAX_VALUES { - MAX_SES_LOG2 = 3, - MAX_RBS_LOG2 = 2 - }; - - enum COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2 { - COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2_2D = 10, - COMPRESSED_DATABLOCKS_IN_METABLOCK_PER_RB_LOG2_3D = 18 - }; - - RB_MAP(void); - - void Get_Comp_Block_Screen_Space( CoordEq& addr, int bytes_log2, int* w, int* h, int* d = NULL); - - void Get_Meta_Block_Screen_Space( int num_comp_blocks_log2, bool is_thick, bool y_biased, - int comp_block_width_log2, int comp_block_height_log2, int comp_block_depth_log2, - int& meta_block_width_log2, int& meta_block_height_log2, int& meta_block_depth_log2 ); - void cap_pipe( int xmode, bool is_thick, int& num_ses_log2, int bpp_log2, int num_samples_log2, int pipe_interleave_log2, - int& block_size_log2, int& num_pipes_log2 ); - - void Get_Data_Offset_Equation( CoordEq& data_eq, int data_type, int bpp_log2, int num_samples_log2, int block_size_log2 ); - - void Get_RB_Equation( CoordEq& rb_equation, int num_ses_log2, int num_rbs_log2 ); - - void Get_Pipe_Equation( CoordEq& pipe_equation, CoordEq& addr, - int pipe_interleave_log2, - int num_pipes_log2, - int block_size_log2, - int num_samples_log2, - int xmode, int data_type - ); - - void get_meta_miptail_coord( int& x, int& y, int& z, int mip_in_tail, int blk_width_log2, int blk_height_log2, int blk_depth_log2 ); - - void get_mip_coord( int& x, int& y, int& z, int mip, - int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2, - int data_blk_width_log2, int data_blk_height_log2, - int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip, - int data_type, int bpp_log2, bool meta_linear ); - - void get_mip_coord_linear( int& x, int& y, int& z, int mip, int data_blk_width_log2, int data_blk_height_log2, - int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip, int data_type, int bpp_log2 ); - - void get_mip_coord_nonlinear( int& x, int& y, int& z, int mip, int meta_blk_width_log2, int meta_blk_height_log2, int meta_blk_depth_log2, - int& surf_width, int& surf_height, int& surf_depth, int epitch, int max_mip, int data_type ); - - void get_meta_eq( CoordEq& metaaddr, int max_mip, int num_ses_log2, int num_rbs_log2, int &num_pipes_log2, - int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2, - int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear); - -#if 0 - long get_meta_addr( int x, int y, int z, int s, int mip, - int surf_width, int surf_height, int surf_depth, int epitch, - long surf_base, int pipe_xor, int max_mip, - int num_ses_log2, int num_rbs_log2, int num_pipes_log2, - int block_size_log2, int bpp_log2, int num_samples_log2, int max_comp_frag_log2, - int pipe_interleave_log2, int xmode, int data_type, int meta_alignment, bool meta_linear); -#endif - - long get_meta_addr_calc( int x, int y, int z, int s, - long surf_base, int element_bytes_log2, int num_samples_log2, int max_comp_frag_log2, - long pitch, long slice, - int max_mip, - //int swizzle_mode, - int xmode, int pipe_xor, int block_size_log2, - /*int num_banks_log2,*/ int num_pipes_log2, - int pipe_interleave_log2, int meta_alignment, int dim_type, int x_mip_org, int y_mip_org, - int z_mip_org, int num_ses_log2, int num_rbs_log2, /*bool se_affinity_enable,*/ int data_type, - int l2_metablk_w, int l2_metablk_h, int l2_metablk_d, bool meta_linear); - - void Initialize(void); - -public: - enum XOR_RANGE { - NONE = 0, - XOR = 1, - PRT = 2 - }; - - - enum DATA_TYPE_ENUM { - DATA_COLOR1D, - DATA_COLOR2D, - DATA_COLOR3D_S, - DATA_COLOR3D_Z, - DATA_Z_STENCIL, - DATA_FMASK, - DATA_COLOR2D_LINEAR, - DATA_COLOR3D_D_NOT_USED // should not be used; use COLOR2D instead - }; - - enum META_ALIGNMENT { - META_ALIGN_NONE, - META_ALIGN_PIPE, - META_ALIGN_RB, - META_ALIGN_PIPE_RB - }; - - CoordEq rb_equation[MAX_SES_LOG2+1][MAX_RBS_LOG2+1]; - CoordEq zaddr [4][4]; - CoordEq caddr [5][4]; - CoordEq c3addr[5][2]; -}; - -#endif diff --git a/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h b/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h index 823710cc189..49cc65a7dcb 100644 --- a/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h +++ b/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h @@ -27,6 +27,14 @@ * of the Software. */ +#include "util/u_endian.h" + +#if defined(PIPE_ARCH_LITTLE_ENDIAN) +#define LITTLEENDIAN_CPU +#elif defined(PIPE_ARCH_BIG_ENDIAN) +#define BIGENDIAN_CPU +#endif + // // Make sure the necessary endian defines are there. // diff --git a/src/amd/addrlib/inc/chip/r800/si_gb_reg.h b/src/amd/addrlib/inc/chip/r800/si_gb_reg.h index cf67f602bdf..793edbc6280 100644 --- a/src/amd/addrlib/inc/chip/r800/si_gb_reg.h +++ b/src/amd/addrlib/inc/chip/r800/si_gb_reg.h @@ -27,6 +27,14 @@ * of the Software. */ +#include "util/u_endian.h" + +#if defined(PIPE_ARCH_LITTLE_ENDIAN) +#define LITTLEENDIAN_CPU +#elif defined(PIPE_ARCH_BIG_ENDIAN) +#define BIGENDIAN_CPU +#endif + // // Make sure the necessary endian defines are there. // diff --git a/src/amd/addrlib/inc/lnx_common_defs.h b/src/amd/addrlib/inc/lnx_common_defs.h deleted file mode 100644 index 61540f49b7e..00000000000 --- a/src/amd/addrlib/inc/lnx_common_defs.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ -#ifndef _lnx_common_defs_h_ -#define _lnx_common_defs_h_ - -#if DBG -#include <stdarg.h> // We do not have any choice: need variable - // number of parameters support for debug - // build. -#endif // #if DBG - -// -// -------------- External functions from Linux kernel driver ---------------- -// -// Note: The definitions/declararions below must match the original ones. - -#ifdef __cplusplus -extern "C" { -#endif - -typedef unsigned long __ke_size_t; // as it is defined in firegl_public.h -typedef int __kernel_ptrdiff_t; // as it is defined in posix_types.h - - -#if !defined(ATI_API_CALL) -#define ATI_API_CALL __attribute__((regparm(0))) -#endif - -extern void * ATI_API_CALL __ke_memset(void* s, int c, __ke_size_t count); -extern void * ATI_API_CALL __ke_memcpy(void* d, const void* s, __ke_size_t count); -extern ATI_API_CALL __ke_size_t __ke_strlen(const char *s); -extern char* ATI_API_CALL __ke_strcpy(char* d, const char* s); -extern char* ATI_API_CALL __ke_strncpy(char* d, const char* s, __ke_size_t count); -extern void __ke_printk(const char* fmt, ...); - -extern int ATI_API_CALL __ke_snprintf(char* buf, __ke_size_t size, const char* fmt, ...); -extern int ATI_API_CALL KCL_CopyFromUserSpace(void* to, const void* from, __ke_size_t size); -extern int ATI_API_CALL KCL_CopyToUserSpace(void* to, const void* from, __ke_size_t size); -#define __ke_copy_from_user KCL_CopyFromUserSpace -#define __ke_copy_to_user KCL_CopyToUserSpace -extern int ATI_API_CALL __ke_verify_area(int type, const void * addr, unsigned long size); - -extern unsigned long ATI_API_CALL KAS_GetTickCounter(void); -extern unsigned long ATI_API_CALL KAS_GetTicksPerSecond(void); - - -#if DBG -extern int ATI_API_CALL __ke_vsnprintf(char *buf, __ke_size_t size, const char *fmt, va_list ap); -#define vsnprintf(_dst, _size, _fmt, varg) __ke_snprintf(_dst, _size, _fmt, varg) -#endif // #if DBG - - -// Note: This function is not defined in firegl_public.h. -void firegl_hardwareHangRecovery(void); - -#ifdef __cplusplus -} -#endif - -// -// -------------------------- C/C++ standard typedefs ---------------------------- -// -#ifdef __SIZE_TYPE__ -typedef __SIZE_TYPE__ size_t; -#else // #ifdef __SIZE_TYPE__ -typedef unsigned int size_t; -#endif // #ifdef __SIZE_TYPE__ - -#ifdef __PTRDIFF_TYPE__ -typedef __PTRDIFF_TYPE__ ptrdiff_t; -#else // #ifdef __PTRDIFF_TYPE__ -typedef int ptrdiff_t; -#endif // #ifdef __PTRDIFF_TYPE__ - -#ifndef NULL -#ifdef __cplusplus -#define NULL __null -#else -#define NULL ((void *)0) -#endif -#endif - - -// -// ------------------------- C/C++ standard macros --------------------------- -// - -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) // as it is defined in stddef.h -#define CHAR_BIT 8 // as it is defined in limits.h - -// -// --------------------------------- C RTL ----------------------------------- -// - -#define memset(_p, _v, _n) __ke_memset(_p, _v, _n) -#define memcpy(_d, _s, _n) __ke_memcpy(_d, _s, _n) -#define strlen(_s) __ke_strlen(_s) -#define strcpy(_d, _s) __ke_strcpy(_d, _s) -#define strncpy(_d, _s, _n) __ke_strncpy(_d, _s, _n) -// Note: C99 supports macros with variable number of arguments. GCC also supports this C99 feature as -// C++ extension. -#define snprintf(_dst, _size, _fmt, arg...) __ke_snprintf(_dst, _size, _fmt, ##arg) - - -#endif // #ifdef _lnx_common_defs_h_ - diff --git a/src/amd/addrlib/meson.build b/src/amd/addrlib/meson.build index a6cad1207b0..62beb0ecbc1 100644 --- a/src/amd/addrlib/meson.build +++ b/src/amd/addrlib/meson.build @@ -38,11 +38,9 @@ files_addrlib = files( 'gfx9/coord.h', 'gfx9/gfx9addrlib.cpp', 'gfx9/gfx9addrlib.h', - 'gfx9/rbmap.cpp', - 'gfx9/rbmap.h', + 'amdgpu_asic_addr.h', 'inc/chip/gfx9/gfx9_gb_reg.h', 'inc/chip/r800/si_gb_reg.h', - 'inc/lnx_common_defs.h', 'r800/chip/si_ci_vi_merged_enum.h', 'r800/ciaddrlib.cpp', 'r800/ciaddrlib.h', diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp index 4f67350c82f..322dcf64ffd 100644 --- a/src/amd/addrlib/r800/ciaddrlib.cpp +++ b/src/amd/addrlib/r800/ciaddrlib.cpp @@ -35,15 +35,7 @@ #include "si_gb_reg.h" -#include "si_ci_vi_merged_enum.h" - -#if BRAHMA_BUILD -#include "amdgpu_id.h" -#else -#include "ci_id.h" -#include "kv_id.h" -#include "vi_id.h" -#endif +#include "amdgpu_asic_addr.h" //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -188,7 +180,6 @@ CiLib::CiLib(const Client* pClient) m_allowNonDispThickModes(FALSE) { m_class = CI_ADDRLIB; - memset(&m_settings, 0, sizeof(m_settings)); } /** @@ -450,7 +441,6 @@ BOOL_32 CiLib::HwlInitGlobalParams( // read the correct pipes from tile mode table if (m_settings.isHawaii) { - // Hawaii has 16-pipe, see GFXIP_Config_Summary.xls m_pipes = 16; } else if (m_settings.isBonaire || m_settings.isSpectre) @@ -600,9 +590,9 @@ INT_32 CiLib::HwlPostCheckTileIndex( **************************************************************************************************** */ ADDR_E_RETURNCODE CiLib::HwlSetupTileCfg( - UINT_32 bpp, ///< [in] Bits per pixel - INT_32 index, ///< [in] Tile index - INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI) + UINT_32 bpp, ///< Bits per pixel + INT_32 index, ///< Tile index + INT_32 macroModeIndex, ///< Index in macro tile mode table(CI) ADDR_TILEINFO* pInfo, ///< [out] Tile Info AddrTileMode* pMode, ///< [out] Tile mode AddrTileType* pType ///< [out] Tile type @@ -711,13 +701,12 @@ ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo( ADDR_E_RETURNCODE retCode = SiLib::HwlComputeSurfaceInfo(pIn, pOut); - if ((pIn->mipLevel > 0) && (pOut->tcCompatible == TRUE) && (pOut->tileMode != pIn->tileMode) && (m_settings.isVolcanicIslands == TRUE)) { - CheckTcCompatibility(pOut->pTileInfo, pIn->bpp, pOut->tileMode, pOut->tileType, pOut); + pOut->tcCompatible = CheckTcCompatibility(pOut->pTileInfo, pIn->bpp, pOut->tileMode, pOut->tileType, pOut); } if (pOut->macroModeIndex == TileIndexNoMacroIndex) @@ -1572,7 +1561,7 @@ VOID CiLib::HwlSetupTileInfo( if (flags.tcCompatible) { - CheckTcCompatibility(pTileInfo, bpp, tileMode, inTileType, pOut); + flags.tcCompatible = CheckTcCompatibility(pTileInfo, bpp, tileMode, inTileType, pOut); } pOut->tcCompatible = flags.tcCompatible; @@ -2271,19 +2260,21 @@ BOOL_32 CiLib::DepthStencilTileCfgMatch( * CiLib::DepthStencilTileCfgMatch * * @brief -* Turn off TcCompatible if requirement is not met +* Check if tc compatibility is available * @return -* N/A +* If tc compatibility is not available **************************************************************************************************** */ -VOID CiLib::CheckTcCompatibility( - const ADDR_TILEINFO* pTileInfo, ///< [in] input tile info - UINT_32 bpp, ///< [in] Bits per pixel - AddrTileMode tileMode, ///< [in] input tile mode - AddrTileType tileType, ///< [in] input tile type - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] out structure +BOOL_32 CiLib::CheckTcCompatibility( + const ADDR_TILEINFO* pTileInfo, ///< [in] input tile info + UINT_32 bpp, ///< [in] Bits per pixel + AddrTileMode tileMode, ///< [in] input tile mode + AddrTileType tileType, ///< [in] input tile type + const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in] output surf info ) const { + BOOL_32 tcCompatible = TRUE; + if (IsMacroTiled(tileMode)) { if (tileType != ADDR_DEPTH_SAMPLE_ORDER) @@ -2309,7 +2300,7 @@ VOID CiLib::CheckTcCompatibility( if (m_rowSize < colorTileSplit) { - pOut->tcCompatible = FALSE; + tcCompatible = FALSE; } } } @@ -2317,8 +2308,10 @@ VOID CiLib::CheckTcCompatibility( else { // Client should not enable tc compatible for linear and 1D tile modes. - pOut->tcCompatible = FALSE; + tcCompatible = FALSE; } + + return tcCompatible; } } // V1 diff --git a/src/amd/addrlib/r800/ciaddrlib.h b/src/amd/addrlib/r800/ciaddrlib.h index 3c838dfc53c..c11b678574f 100644 --- a/src/amd/addrlib/r800/ciaddrlib.h +++ b/src/amd/addrlib/r800/ciaddrlib.h @@ -44,37 +44,6 @@ namespace V1 /** **************************************************************************************************** -* @brief CI specific settings structure. -**************************************************************************************************** -*/ -struct CIChipSettings -{ - struct - { - UINT_32 isSeaIsland : 1; - UINT_32 isBonaire : 1; - UINT_32 isKaveri : 1; - UINT_32 isSpectre : 1; - UINT_32 isSpooky : 1; - UINT_32 isKalindi : 1; - // Hawaii is GFXIP 7.2 - UINT_32 isHawaii : 1; - - // VI - UINT_32 isVolcanicIslands : 1; - UINT_32 isIceland : 1; - UINT_32 isTonga : 1; - UINT_32 isFiji : 1; - UINT_32 isPolaris10 : 1; - UINT_32 isPolaris11 : 1; - UINT_32 isPolaris12 : 1; - // VI fusion (Carrizo) - UINT_32 isCarrizo : 1; - }; -}; - -/** -**************************************************************************************************** * @brief This class is the CI specific address library * function set. **************************************************************************************************** @@ -208,9 +177,8 @@ private: const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - VOID CheckTcCompatibility( - const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode, - AddrTileType tileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + BOOL_32 CheckTcCompatibility(const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode, + AddrTileType tileType, const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; static const UINT_32 MacroTileTableSize = 16; static const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2; @@ -221,8 +189,6 @@ private: ADDR_TILEINFO m_macroTileTable[MacroTileTableSize]; UINT_32 m_noOfMacroEntries; BOOL_32 m_allowNonDispThickModes; - - CIChipSettings m_settings; }; } // V1 diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp index 7affdecbf02..99aa6cf4cdb 100644 --- a/src/amd/addrlib/r800/egbaddrlib.cpp +++ b/src/amd/addrlib/r800/egbaddrlib.cpp @@ -739,13 +739,12 @@ BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMicroTiled( AdjustPitchAlignment(flags, pPitchAlign); - // ECR#393489 - // Workaround 2 for 1D tiling - There is HW bug for Carrizo + // Workaround 2 for 1D tiling - There is HW bug for Carrizo, // where it requires the following alignments for 1D tiling. if (flags.czDispCompatible && (mipLevel == 0)) { *pBaseAlign = PowTwoAlign(*pBaseAlign, 4096); //Base address MOD 4096 = 0 - *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0 + *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0 } // end Carrizo workaround for 1D tilling @@ -1091,6 +1090,8 @@ AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode( ADDR_TILEINFO* pTileInfo ///< [in] ptr to bank structure ) const { + UINT_64 bytesPerSlice; + (void)bytesPerSlice; UINT_32 bytesPerTile; AddrTileMode expTileMode = baseTileMode; @@ -1100,6 +1101,7 @@ AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode( // // Compute the size of a slice. // + bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples); bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples); // @@ -1329,12 +1331,6 @@ UINT_64 EgBasedLib::DispatchComputeSurfaceAddrFromCoord( UINT_32* pBitPosition = &pOut->bitPosition; UINT_64 addr; -#if ADDR_AM_BUILD - UINT_32 addr5Bit = 0; - UINT_32 addr5Swizzle = pIn->addr5Swizzle; - BOOL_32 is32ByteTile = pIn->is32ByteTile; -#endif - // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order if (microTileType == ADDR_DEPTH_SAMPLE_ORDER) { @@ -1439,23 +1435,6 @@ UINT_64 EgBasedLib::DispatchComputeSurfaceAddrFromCoord( break; } -#if ADDR_AM_BUILD - if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) - { - if (addr5Swizzle && isDepthSampleOrder && is32ByteTile) - { - UINT_32 tx = x >> 3; - UINT_32 ty = y >> 3; - UINT_32 tileBits = ((ty&0x3) << 2) | (tx&0x3); - - tileBits = tileBits & addr5Swizzle; - addr5Bit = XorReduce(tileBits, 4); - - addr = addr | static_cast<UINT_64>(addr5Bit << 5); - } - } -#endif - return addr; } @@ -2751,6 +2730,8 @@ ADDR_E_RETURNCODE EgBasedLib::HwlComputeBaseSwizzle( { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK }; + UINT_32 pipes = HwlGetPipes(pTileInfo); + (void)pipes; UINT_32 banks = pTileInfo ? pTileInfo->banks : 2; UINT_32 hwNumBanks; @@ -3379,20 +3360,6 @@ ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskAddrFromCoord( { ADDR_E_RETURNCODE retCode = ADDR_OK; -#if ADDR_AM_BUILD - if ((pIn->x > pIn->pitch) || - (pIn->y > pIn->height) || - (pIn->numSamples > m_maxSamples) || - (pIn->sample >= m_maxSamples)) - { - retCode = ADDR_INVALIDPARAMS; - } - else - { - pOut->addr = DispatchComputeFmaskAddrFromCoord(pIn, pOut); - } -#endif - return retCode; } @@ -3412,618 +3379,9 @@ ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskCoordFromAddr( { ADDR_E_RETURNCODE retCode = ADDR_OK; -#if ADDR_AM_BUILD - if ((pIn->bitPosition >= 8) || - (pIn->numSamples > m_maxSamples)) - { - retCode = ADDR_INVALIDPARAMS; - } - else - { - DispatchComputeFmaskCoordFromAddr(pIn, pOut); - } -#endif - return retCode; } -#if ADDR_AM_BUILD -/** -**************************************************************************************************** -* EgBasedLib::DispatchComputeFmaskAddrFromCoord -* -* @brief -* Computes the FMASK address and bit position from a coordinate. -* @return -* The byte address -**************************************************************************************************** -*/ -UINT_64 EgBasedLib::DispatchComputeFmaskAddrFromCoord( - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - UINT_32 x = pIn->x; - UINT_32 y = pIn->y; - UINT_32 slice = pIn->slice; - UINT_32 sample = pIn->sample; - UINT_32 plane = pIn->plane; - UINT_32 pitch = pIn->pitch; - UINT_32 height = pIn->height; - UINT_32 numSamples = pIn->numSamples; - AddrTileMode tileMode = pIn->tileMode; - BOOL_32 ignoreSE = pIn->ignoreSE; - ADDR_TILEINFO* pTileInfo = pIn->pTileInfo; - BOOL_32 resolved = pIn->resolved; - - UINT_32* pBitPosition = &pOut->bitPosition; - UINT_64 addr = 0; - - ADDR_ASSERT(numSamples > 1); - ADDR_ASSERT(Thickness(tileMode) == 1); - - switch (tileMode) - { - case ADDR_TM_1D_TILED_THIN1: - addr = ComputeFmaskAddrFromCoordMicroTiled(x, - y, - slice, - sample, - plane, - pitch, - height, - numSamples, - tileMode, - resolved, - pBitPosition); - break; - case ADDR_TM_2D_TILED_THIN1: //fall through - case ADDR_TM_3D_TILED_THIN1: - UINT_32 pipeSwizzle; - UINT_32 bankSwizzle; - - if (m_configFlags.useCombinedSwizzle) - { - ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo, - &bankSwizzle, &pipeSwizzle); - } - else - { - pipeSwizzle = pIn->pipeSwizzle; - bankSwizzle = pIn->bankSwizzle; - } - - addr = ComputeFmaskAddrFromCoordMacroTiled(x, - y, - slice, - sample, - plane, - pitch, - height, - numSamples, - tileMode, - pipeSwizzle, - bankSwizzle, - ignoreSE, - pTileInfo, - resolved, - pBitPosition); - break; - default: - *pBitPosition = 0; - break; - } - - return addr; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeFmaskAddrFromCoordMicroTiled -* -* @brief -* Computes the FMASK address and bit position from a coordinate for 1D tilied (micro -* tiled) -* @return -* The byte address -**************************************************************************************************** -*/ -UINT_64 EgBasedLib::ComputeFmaskAddrFromCoordMicroTiled( - UINT_32 x, ///< [in] x coordinate - UINT_32 y, ///< [in] y coordinate - UINT_32 slice, ///< [in] slice index - UINT_32 sample, ///< [in] sample number - UINT_32 plane, ///< [in] plane number - UINT_32 pitch, ///< [in] surface pitch in pixels - UINT_32 height, ///< [in] surface height in pixels - UINT_32 numSamples, ///< [in] number of samples - AddrTileMode tileMode, ///< [in] tile mode - BOOL_32 resolved, ///< [in] TRUE if this is for resolved fmask - UINT_32* pBitPosition ///< [out] pointer to returned bit position - ) const -{ - UINT_64 addr = 0; - UINT_32 effectiveBpp; - UINT_32 effectiveSamples; - - // - // 2xAA use the same layout as 4xAA - // - if (numSamples == 2) - { - numSamples = 4; - } - - // - // Compute the number of planes. - // - if (resolved == FALSE) - { - effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples); - effectiveBpp = numSamples; - - // - // Compute the address just like a color surface with numSamples bits per element and - // numPlanes samples. - // - addr = ComputeSurfaceAddrFromCoordMicroTiled(x, - y, - slice, - plane, // sample - effectiveBpp, - pitch, - height, - effectiveSamples, - tileMode, - ADDR_NON_DISPLAYABLE, - FALSE, - pBitPosition); - - // - // Compute the real bit position. Each (sample, plane) is stored with one bit per sample. - // - - // - // Compute the pixel index with in the micro tile - // - UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x % 8, - y % 8, - slice, - 1, - tileMode, - ADDR_NON_DISPLAYABLE); - - *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1); - - UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition; - - addr = bitAddr / 8; - } - else - { - effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples); - effectiveSamples = 1; - - // - // Compute the address just like a color surface with numSamples bits per element and - // numPlanes samples. - // - addr = ComputeSurfaceAddrFromCoordMicroTiled(x, - y, - slice, - sample, - effectiveBpp, - pitch, - height, - effectiveSamples, - tileMode, - ADDR_NON_DISPLAYABLE, - TRUE, - pBitPosition); - } - - return addr; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeFmaskAddrFromCoordMacroTiled -* -* @brief -* Computes the FMASK address and bit position from a coordinate for 2D tilied (macro -* tiled) -* @return -* The byte address -**************************************************************************************************** -*/ -UINT_64 EgBasedLib::ComputeFmaskAddrFromCoordMacroTiled( - UINT_32 x, ///< [in] x coordinate - UINT_32 y, ///< [in] y coordinate - UINT_32 slice, ///< [in] slice index - UINT_32 sample, ///< [in] sample number - UINT_32 plane, ///< [in] plane number - UINT_32 pitch, ///< [in] surface pitch in pixels - UINT_32 height, ///< [in] surface height in pixels - UINT_32 numSamples, ///< [in] number of samples - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 pipeSwizzle, ///< [in] pipe swizzle - UINT_32 bankSwizzle, ///< [in] bank swizzle - BOOL_32 ignoreSE, ///< [in] TRUE if ignore shader engine - ADDR_TILEINFO* pTileInfo, ///< [in] bank structure.**All fields to be valid on entry** - BOOL_32 resolved, ///< [in] TRUE if this is for resolved fmask - UINT_32* pBitPosition ///< [out] pointer to returned bit position - ) const -{ - UINT_64 addr = 0; - UINT_32 effectiveBpp; - UINT_32 effectiveSamples; - - // - // 2xAA use the same layout as 4xAA - // - if (numSamples == 2) - { - numSamples = 4; - } - - // - // Compute the number of planes. - // - if (resolved == FALSE) - { - effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples); - effectiveBpp = numSamples; - - // - // Compute the address just like a color surface with numSamples bits per element and - // numPlanes samples. - // - addr = ComputeSurfaceAddrFromCoordMacroTiled(x, - y, - slice, - plane, // sample - effectiveBpp, - pitch, - height, - effectiveSamples, - tileMode, - ADDR_NON_DISPLAYABLE,// isdisp - ignoreSE,// ignore_shader - FALSE,// depth_sample_order - pipeSwizzle, - bankSwizzle, - pTileInfo, - pBitPosition); - - // - // Compute the real bit position. Each (sample, plane) is stored with one bit per sample. - // - - - // - // Compute the pixel index with in the micro tile - // - UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x , - y , - slice, - effectiveBpp, - tileMode, - ADDR_NON_DISPLAYABLE); - - *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1); - - UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition; - - addr = bitAddr / 8; - - } - else - { - effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples); - effectiveSamples = 1; - - // - // Compute the address just like a color surface with numSamples bits per element and - // numPlanes samples. - // - addr = ComputeSurfaceAddrFromCoordMacroTiled(x, - y, - slice, - sample, - effectiveBpp, - pitch, - height, - effectiveSamples, - tileMode, - ADDR_NON_DISPLAYABLE, - ignoreSE, - TRUE, - pipeSwizzle, - bankSwizzle, - pTileInfo, - pBitPosition); - } - - return addr; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeFmaskCoordFromAddrMicroTiled -* -* @brief -* Compute (x,y,slice,sample,plane) coordinates from fmask address -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID EgBasedLib::ComputeFmaskCoordFromAddrMicroTiled( - UINT_64 addr, ///< [in] byte address - UINT_32 bitPosition,///< [in] bit position - UINT_32 pitch, ///< [in] pitch in pixels - UINT_32 height, ///< [in] height in pixels - UINT_32 numSamples, ///< [in] number of samples (of color buffer) - AddrTileMode tileMode, ///< [in] tile mode - BOOL_32 resolved, ///< [in] TRUE if it is resolved fmask - UINT_32* pX, ///< [out] X coord - UINT_32* pY, ///< [out] Y coord - UINT_32* pSlice, ///< [out] slice index - UINT_32* pSample, ///< [out] sample index - UINT_32* pPlane ///< [out] plane index - ) const -{ - UINT_32 effectiveBpp; - UINT_32 effectiveSamples; - - // 2xAA use the same layout as 4xAA - if (numSamples == 2) - { - numSamples = 4; - } - - if (resolved == FALSE) - { - effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples); - effectiveBpp = numSamples; - - ComputeSurfaceCoordFromAddrMicroTiled(addr, - bitPosition, - effectiveBpp, - pitch, - height, - effectiveSamples, - tileMode, - 0, // tileBase - 0, // compBits - pX, - pY, - pSlice, - pPlane, - ADDR_NON_DISPLAYABLE, // microTileType - FALSE // isDepthSampleOrder - ); - - - if ( pSample ) - { - *pSample = bitPosition % numSamples; - } - } - else - { - effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples); - effectiveSamples = 1; - - ComputeSurfaceCoordFromAddrMicroTiled(addr, - bitPosition, - effectiveBpp, - pitch, - height, - effectiveSamples, - tileMode, - 0, // tileBase - 0, // compBits - pX, - pY, - pSlice, - pSample, - ADDR_NON_DISPLAYABLE, // microTileType - TRUE // isDepthSampleOrder - ); - } -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeFmaskCoordFromAddrMacroTiled -* -* @brief -* Compute (x,y,slice,sample,plane) coordinates from -* fmask address -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID EgBasedLib::ComputeFmaskCoordFromAddrMacroTiled( - UINT_64 addr, ///< [in] byte address - UINT_32 bitPosition,///< [in] bit position - UINT_32 pitch, ///< [in] pitch in pixels - UINT_32 height, ///< [in] height in pixels - UINT_32 numSamples, ///< [in] number of samples (of color buffer) - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 pipeSwizzle,///< [in] pipe swizzle - UINT_32 bankSwizzle,///< [in] bank swizzle - BOOL_32 ignoreSE, ///< [in] TRUE if ignore shader engine - ADDR_TILEINFO* pTileInfo, ///< [in] bank structure. **All fields to be valid on entry** - BOOL_32 resolved, ///< [in] TRUE if it is resolved fmask - UINT_32* pX, ///< [out] X coord - UINT_32* pY, ///< [out] Y coord - UINT_32* pSlice, ///< [out] slice index - UINT_32* pSample, ///< [out] sample index - UINT_32* pPlane ///< [out] plane index - ) const -{ - UINT_32 effectiveBpp; - UINT_32 effectiveSamples; - - // 2xAA use the same layout as 4xAA - if (numSamples == 2) - { - numSamples = 4; - } - - // - // Compute the number of planes. - // - if (resolved == FALSE) - { - effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples); - effectiveBpp = numSamples; - - ComputeSurfaceCoordFromAddrMacroTiled(addr, - bitPosition, - effectiveBpp, - pitch, - height, - effectiveSamples, - tileMode, - 0, // No tileBase - 0, // No compBits - ADDR_NON_DISPLAYABLE, - ignoreSE, - FALSE, - pipeSwizzle, - bankSwizzle, - pTileInfo, - pX, - pY, - pSlice, - pPlane); - - if (pSample) - { - *pSample = bitPosition % numSamples; - } - } - else - { - effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples); - effectiveSamples = 1; - - ComputeSurfaceCoordFromAddrMacroTiled(addr, - bitPosition, - effectiveBpp, - pitch, - height, - effectiveSamples, - tileMode, - 0, // No tileBase - 0, // No compBits - ADDR_NON_DISPLAYABLE, - ignoreSE, - TRUE, - pipeSwizzle, - bankSwizzle, - pTileInfo, - pX, - pY, - pSlice, - pSample); - } -} - -/** -**************************************************************************************************** -* EgBasedLib::DispatchComputeFmaskCoordFromAddr -* -* @brief -* Compute (x,y,slice,sample,plane) coordinates from -* fmask address -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID EgBasedLib::DispatchComputeFmaskCoordFromAddr( - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - UINT_64 addr = pIn->addr; - UINT_32 bitPosition = pIn->bitPosition; - UINT_32 pitch = pIn->pitch; - UINT_32 height = pIn->height; - UINT_32 numSamples = pIn->numSamples; - AddrTileMode tileMode = pIn->tileMode; - BOOL_32 ignoreSE = pIn->ignoreSE; - ADDR_TILEINFO* pTileInfo = pIn->pTileInfo; - BOOL_32 resolved = pIn->resolved; - - UINT_32* pX = &pOut->x; - UINT_32* pY = &pOut->y; - UINT_32* pSlice = &pOut->slice; - UINT_32* pSample = &pOut->sample; - UINT_32* pPlane = &pOut->plane; - - switch (tileMode) - { - case ADDR_TM_1D_TILED_THIN1: - ComputeFmaskCoordFromAddrMicroTiled(addr, - bitPosition, - pitch, - height, - numSamples, - tileMode, - resolved, - pX, - pY, - pSlice, - pSample, - pPlane); - break; - case ADDR_TM_2D_TILED_THIN1://fall through - case ADDR_TM_3D_TILED_THIN1: - UINT_32 pipeSwizzle; - UINT_32 bankSwizzle; - - if (m_configFlags.useCombinedSwizzle) - { - ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo, - &bankSwizzle, &pipeSwizzle); - } - else - { - pipeSwizzle = pIn->pipeSwizzle; - bankSwizzle = pIn->bankSwizzle; - } - - ComputeFmaskCoordFromAddrMacroTiled(addr, - bitPosition, - pitch, - height, - numSamples, - tileMode, - pipeSwizzle, - bankSwizzle, - ignoreSE, - pTileInfo, - resolved, - pX, - pY, - pSlice, - pSample, - pPlane); - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - - } -} -#endif - /** **************************************************************************************************** * EgBasedLib::ComputeFmaskNumPlanesFromNumSamples diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp index 9ee1335b3ae..0fb5c2befdc 100644 --- a/src/amd/addrlib/r800/siaddrlib.cpp +++ b/src/amd/addrlib/r800/siaddrlib.cpp @@ -32,16 +32,9 @@ */ #include "siaddrlib.h" - #include "si_gb_reg.h" -#include "si_ci_vi_merged_enum.h" - -#if BRAHMA_BUILD -#include "amdgpu_id.h" -#else -#include "si_id.h" -#endif +#include "amdgpu_asic_addr.h" //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2664,7 +2657,8 @@ ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo( if ((pIn->numSlices > 1) && (IsMacroTiled(pOut->tileMode) == TRUE) && - (m_chipFamily == ADDR_CHIP_FAMILY_SI)) + ((m_chipFamily == ADDR_CHIP_FAMILY_SI) || + (IsPrtTileMode(pOut->tileMode) == FALSE))) { pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX; } @@ -2822,8 +2816,8 @@ VOID SiLib::HwlCheckLastMacroTiledLvl( **************************************************************************************************** */ AddrTileMode SiLib::HwlDegradeThickTileMode( - AddrTileMode baseTileMode, ///< [in] base tile mode - UINT_32 numSlices, ///< [in] current number of slices + AddrTileMode baseTileMode, ///< base tile mode + UINT_32 numSlices, ///< current number of slices UINT_32* pBytesPerTile ///< [in,out] pointer to bytes per slice ) const { @@ -2963,9 +2957,9 @@ INT_32 SiLib::HwlPostCheckTileIndex( **************************************************************************************************** */ ADDR_E_RETURNCODE SiLib::HwlSetupTileCfg( - UINT_32 bpp, ///< [in] Bits per pixel - INT_32 index, ///< [in] Tile index - INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI) + UINT_32 bpp, ///< Bits per pixel + INT_32 index, ///< Tile index + INT_32 macroModeIndex, ///< Index in macro tile mode table(CI) ADDR_TILEINFO* pInfo, ///< [out] Tile Info AddrTileMode* pMode, ///< [out] Tile mode AddrTileType* pType ///< [out] Tile type diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h index faf63fde6c5..f07fc31a57d 100644 --- a/src/amd/addrlib/r800/siaddrlib.h +++ b/src/amd/addrlib/r800/siaddrlib.h @@ -59,18 +59,36 @@ struct TileConfig * @brief SI specific settings structure. **************************************************************************************************** */ -struct SIChipSettings +struct SiChipSettings { - struct - { - UINT_32 isSouthernIsland : 1; - UINT_32 isTahiti : 1; - UINT_32 isPitCairn : 1; - UINT_32 isCapeVerde : 1; - /// Oland/Hainan are of GFXIP 6.0, similar with SI - UINT_32 isOland : 1; - UINT_32 isHainan : 1; - }; + UINT_32 isSouthernIsland : 1; + UINT_32 isTahiti : 1; + UINT_32 isPitCairn : 1; + UINT_32 isCapeVerde : 1; + // Oland/Hainan are of GFXIP 6.0, similar with SI + UINT_32 isOland : 1; + UINT_32 isHainan : 1; + + // CI + UINT_32 isSeaIsland : 1; + UINT_32 isBonaire : 1; + UINT_32 isKaveri : 1; + UINT_32 isSpectre : 1; + UINT_32 isSpooky : 1; + UINT_32 isKalindi : 1; + // Hawaii is GFXIP 7.2 + UINT_32 isHawaii : 1; + + // VI + UINT_32 isVolcanicIslands : 1; + UINT_32 isIceland : 1; + UINT_32 isTonga : 1; + UINT_32 isFiji : 1; + UINT_32 isPolaris10 : 1; + UINT_32 isPolaris11 : 1; + UINT_32 isPolaris12 : 1; + // VI fusion + UINT_32 isCarrizo : 1; }; /** @@ -312,12 +330,12 @@ protected: UINT_32 m_uncompressDepthEqIndex; + SiChipSettings m_settings; + private: VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const; BOOL_32 InitTileSettingTable(const UINT_32 *pSetting, UINT_32 noOfEntries); - - SIChipSettings m_settings; }; } // V1 |