diff options
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/memory/StoreTile.h')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/memory/StoreTile.h | 529 |
1 files changed, 0 insertions, 529 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h index 29717261e7f..407cefae54e 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h +++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h @@ -104,7 +104,6 @@ struct StorePixels<8, 2> } }; -#if USE_8x2_TILE_BACKEND template <> struct StorePixels<8, 4> { @@ -130,7 +129,6 @@ struct StorePixels<8, 4> } }; -#endif ////////////////////////////////////////////////////////////////////////// /// StorePixels (32-bit pixel specialization) /// @brief Stores a 4x2 (AVX) raster-tile to two rows. @@ -159,7 +157,6 @@ struct StorePixels<16, 2> } }; -#if USE_8x2_TILE_BACKEND template <> struct StorePixels<16, 4> { @@ -185,7 +182,6 @@ struct StorePixels<16, 4> } }; -#endif ////////////////////////////////////////////////////////////////////////// /// StorePixels (32-bit pixel specialization) /// @brief Stores a 4x2 (AVX) raster-tile to two rows. @@ -213,7 +209,6 @@ struct StorePixels<32, 2> } }; -#if USE_8x2_TILE_BACKEND template <> struct StorePixels<32, 4> { @@ -237,7 +232,6 @@ struct StorePixels<32, 4> } }; -#endif ////////////////////////////////////////////////////////////////////////// /// StorePixels (32-bit pixel specialization) /// @brief Stores a 4x2 (AVX) raster-tile to two rows. @@ -264,7 +258,6 @@ struct StorePixels<64, 4> } }; -#if USE_8x2_TILE_BACKEND template <> struct StorePixels<64, 8> { @@ -287,7 +280,6 @@ struct StorePixels<64, 8> } }; -#endif ////////////////////////////////////////////////////////////////////////// /// StorePixels (32-bit pixel specialization) /// @brief Stores a 4x2 (AVX) raster-tile to two rows. @@ -318,7 +310,6 @@ struct StorePixels<128, 8> } }; -#if USE_8x2_TILE_BACKEND template <> struct StorePixels<128, 16> { @@ -339,7 +330,6 @@ struct StorePixels<128, 16> } }; -#endif ////////////////////////////////////////////////////////////////////////// /// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2) ////////////////////////////////////////////////////////////////////////// @@ -354,7 +344,6 @@ struct ConvertPixelsSOAtoAOS template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND static const uint32_t MAX_RASTER_TILE_BYTES = 16 * 16; // 16 pixels * 16 bytes per pixel OSALIGNSIMD16(uint8_t) soaTile[MAX_RASTER_TILE_BYTES]; @@ -368,21 +357,6 @@ struct ConvertPixelsSOAtoAOS // Convert from SOA --> AOS FormatTraits<DstFormat>::TransposeT::Transpose_16(soaTile, aosTile); -#else - static const uint32_t MAX_RASTER_TILE_BYTES = 128; // 8 pixels * 16 bytes per pixel - - OSALIGNSIMD(uint8_t) soaTile[MAX_RASTER_TILE_BYTES]; - OSALIGNSIMD(uint8_t) aosTile[MAX_RASTER_TILE_BYTES]; - - // Convert from SrcFormat --> DstFormat - simdvector src; - LoadSOA<SrcFormat>(pSrc, src); - StoreSOA<DstFormat>(src, soaTile); - - // Convert from SOA --> AOS - FormatTraits<DstFormat>::TransposeT::Transpose(soaTile, aosTile); - -#endif // Store data into destination StorePixels<FormatTraits<DstFormat>::bpp, NumDests>::Store(aosTile, ppDsts); } @@ -403,7 +377,6 @@ struct ConvertPixelsSOAtoAOS<Format, Format> template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND static const uint32_t MAX_RASTER_TILE_BYTES = 16 * 16; // 16 pixels * 16 bytes per pixel OSALIGNSIMD16(uint8_t) aosTile[MAX_RASTER_TILE_BYTES]; @@ -411,15 +384,6 @@ struct ConvertPixelsSOAtoAOS<Format, Format> // Convert from SOA --> AOS FormatTraits<Format>::TransposeT::Transpose_16(pSrc, aosTile); -#else - static const uint32_t MAX_RASTER_TILE_BYTES = 128; // 8 pixels * 16 bytes per pixel - - OSALIGNSIMD(uint8_t) aosTile[MAX_RASTER_TILE_BYTES]; - - // Convert from SOA --> AOS - FormatTraits<Format>::TransposeT::Transpose(pSrc, aosTile); - -#endif // Store data into destination StorePixels<FormatTraits<Format>::bpp, NumDests>::Store(aosTile, ppDsts); } @@ -439,7 +403,6 @@ struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B5G6R5_UNORM > template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND static const SWR_FORMAT SrcFormat = R32G32B32A32_FLOAT; static const SWR_FORMAT DstFormat = B5G6R5_UNORM; @@ -483,47 +446,6 @@ struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B5G6R5_UNORM > *pAosTile++ = *pPacked++; } -#else - static const SWR_FORMAT SrcFormat = R32G32B32A32_FLOAT; - static const SWR_FORMAT DstFormat = B5G6R5_UNORM; - static const uint32_t MAX_RASTER_TILE_BYTES = 128; // 8 pixels * 16 bytes per pixel - - OSALIGNSIMD(uint8_t) aosTile[MAX_RASTER_TILE_BYTES]; - - // Load hot-tile - simdvector src, dst; - LoadSOA<SrcFormat>(pSrc, src); - - // deswizzle - dst.x = src[FormatTraits<DstFormat>::swizzle(0)]; - dst.y = src[FormatTraits<DstFormat>::swizzle(1)]; - dst.z = src[FormatTraits<DstFormat>::swizzle(2)]; - - // clamp - dst.x = Clamp<DstFormat>(dst.x, 0); - dst.y = Clamp<DstFormat>(dst.y, 1); - dst.z = Clamp<DstFormat>(dst.z, 2); - - // normalize - dst.x = Normalize<DstFormat>(dst.x, 0); - dst.y = Normalize<DstFormat>(dst.y, 1); - dst.z = Normalize<DstFormat>(dst.z, 2); - - // pack - simdscalari packed = _simd_castps_si(dst.x); - packed = _simd_or_si(packed, _simd_slli_epi32(_simd_castps_si(dst.y), FormatTraits<DstFormat>::GetConstBPC(0))); - packed = _simd_or_si(packed, _simd_slli_epi32(_simd_castps_si(dst.z), FormatTraits<DstFormat>::GetConstBPC(0) + - FormatTraits<DstFormat>::GetConstBPC(1))); - - // pack low 16 bits of each 32 bit lane to low 128 bits of dst - uint32_t *pPacked = (uint32_t*)&packed; - uint16_t *pAosTile = (uint16_t*)&aosTile[0]; - for (uint32_t t = 0; t < KNOB_SIMD_WIDTH; ++t) - { - *pAosTile++ = *pPacked++; - } - -#endif // Store data into destination StorePixels<FormatTraits<DstFormat>::bpp, NumDests>::Store(aosTile, ppDsts); } @@ -546,7 +468,6 @@ struct ConvertPixelsSOAtoAOS<R32_FLOAT, R24_UNORM_X8_TYPELESS> template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND simd16scalar comp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc)); // clamp @@ -579,46 +500,9 @@ struct ConvertPixelsSOAtoAOS<R32_FLOAT, R24_UNORM_X8_TYPELESS> _simd_storeu2_si(reinterpret_cast<simd4scalari *>(ppDsts[1]), reinterpret_cast<simd4scalari *>(ppDsts[0]), _simd16_extract_si(dest, 0)); _simd_storeu2_si(reinterpret_cast<simd4scalari *>(ppDsts[3]), reinterpret_cast<simd4scalari *>(ppDsts[2]), _simd16_extract_si(dest, 1)); -#else - static const uint32_t MAX_RASTER_TILE_BYTES = 128; // 8 pixels * 16 bytes per pixel - - OSALIGNSIMD(uint8_t) soaTile[MAX_RASTER_TILE_BYTES]; - OSALIGNSIMD(uint8_t) aosTile[MAX_RASTER_TILE_BYTES]; - - // Convert from SrcFormat --> DstFormat - simdvector src; - LoadSOA<SrcFormat>(pSrc, src); - StoreSOA<DstFormat>(src, soaTile); - - // Convert from SOA --> AOS - FormatTraits<DstFormat>::TransposeT::Transpose(soaTile, aosTile); - - // Store data into destination but don't overwrite the X8 bits - // Each 4-pixel row is 16-bytes - simd4scalari *pZRow01 = (simd4scalari*)aosTile; - simd4scalari vQuad00 = SIMD128::load_si(pZRow01); - simd4scalari vQuad01 = SIMD128::load_si(pZRow01 + 1); - - simd4scalari vRow00 = SIMD128::unpacklo_epi64(vQuad00, vQuad01); - simd4scalari vRow10 = SIMD128::unpackhi_epi64(vQuad00, vQuad01); - - simd4scalari vDst0 = SIMD128::loadu_si((const simd4scalari*)ppDsts[0]); - simd4scalari vDst1 = SIMD128::loadu_si((const simd4scalari*)ppDsts[1]); - - simd4scalari vMask = _mm_set1_epi32(0xFFFFFF); - - vDst0 = SIMD128::andnot_si(vMask, vDst0); - vDst0 = SIMD128::or_si(vDst0, SIMD128::and_si(vRow00, vMask)); - vDst1 = SIMD128::andnot_si(vMask, vDst1); - vDst1 = SIMD128::or_si(vDst1, SIMD128::and_si(vRow10, vMask)); - - SIMD128::storeu_si((simd4scalari*)ppDsts[0], vDst0); - SIMD128::storeu_si((simd4scalari*)ppDsts[1], vDst1); -#endif } }; -#if USE_8x2_TILE_BACKEND template<SWR_FORMAT DstFormat> INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst0, uint8_t* pDst1, uint8_t* pDst2, uint8_t* pDst3) { @@ -689,7 +573,6 @@ INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst0, uint8_t* pDs _simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst3), reinterpret_cast<simd4scalari *>(pDst2), _simd16_extract_si(final, 1)); } -#endif template<SWR_FORMAT DstFormat> INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst1) { @@ -790,7 +673,6 @@ INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst _simd_storeu2_si((simd4scalari*)pDst1, (simd4scalari*)pDst, final); } -#if USE_8x2_TILE_BACKEND template<SWR_FORMAT DstFormat> INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst0, uint8_t* pDst1, uint8_t* pDst2, uint8_t* pDst3) { @@ -854,7 +736,6 @@ INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst0, uint8 _simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst3), reinterpret_cast<simd4scalari *>(pDst2), _simd16_extract_si(final, 1)); } -#endif template<SWR_FORMAT DstFormat> INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst1) { @@ -947,11 +828,7 @@ struct ConvertPixelsSOAtoAOS<R32G32B32A32_FLOAT, B8G8R8A8_UNORM> template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND FlatConvert<B8G8R8A8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]); -#else - FlatConvert<B8G8R8A8_UNORM>(pSrc, ppDsts[0], ppDsts[1]); -#endif } }; @@ -961,11 +838,7 @@ struct ConvertPixelsSOAtoAOS<R32G32B32A32_FLOAT, B8G8R8X8_UNORM> template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND FlatConvertNoAlpha<B8G8R8X8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]); -#else - FlatConvertNoAlpha<B8G8R8X8_UNORM>(pSrc, ppDsts[0], ppDsts[1]); -#endif } }; @@ -975,11 +848,7 @@ struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B8G8R8A8_UNORM_SRGB > template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND FlatConvert<B8G8R8A8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]); -#else - FlatConvert<B8G8R8A8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1]); -#endif } }; @@ -989,11 +858,7 @@ struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B8G8R8X8_UNORM_SRGB > template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND FlatConvertNoAlpha<B8G8R8X8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]); -#else - FlatConvertNoAlpha<B8G8R8X8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1]); -#endif } }; @@ -1003,11 +868,7 @@ struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8A8_UNORM > template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND FlatConvert<R8G8B8A8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]); -#else - FlatConvert<R8G8B8A8_UNORM>(pSrc, ppDsts[0], ppDsts[1]); -#endif } }; @@ -1017,11 +878,7 @@ struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8X8_UNORM > template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND FlatConvertNoAlpha<R8G8B8X8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]); -#else - FlatConvertNoAlpha<R8G8B8X8_UNORM>(pSrc, ppDsts[0], ppDsts[1]); -#endif } }; @@ -1031,11 +888,7 @@ struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8A8_UNORM_SRGB > template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND FlatConvert<R8G8B8A8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]); -#else - FlatConvert<R8G8B8A8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1]); -#endif } }; @@ -1045,11 +898,7 @@ struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8X8_UNORM_SRGB > template <size_t NumDests> INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) { -#if USE_8x2_TILE_BACKEND FlatConvertNoAlpha<R8G8B8X8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]); -#else - FlatConvertNoAlpha<R8G8B8X8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1]); -#endif } }; @@ -1069,7 +918,6 @@ struct StoreRasterTile uint32_t x, uint32_t y, float outputColor[4]) { -#if USE_8x2_TILE_BACKEND typedef SimdTile_16<SrcFormat, DstFormat> SimdT; SimdT *pSrcSimdTiles = reinterpret_cast<SimdT *>(pSrc); @@ -1083,21 +931,6 @@ struct StoreRasterTile uint32_t simdOffset = (y % SIMD16_TILE_Y_DIM) * SIMD16_TILE_X_DIM + (x % SIMD16_TILE_X_DIM); pSimdTile->GetSwizzledColor(simdOffset, outputColor); -#else - typedef SimdTile<SrcFormat, DstFormat> SimdT; - - SimdT* pSrcSimdTiles = (SimdT*)pSrc; - - // Compute which simd tile we're accessing within 8x8 tile. - // i.e. Compute linear simd tile coordinate given (x, y) in pixel coordinates. - uint32_t simdIndex = (y / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM) + (x / SIMD_TILE_X_DIM); - - SimdT* pSimdTile = &pSrcSimdTiles[simdIndex]; - - uint32_t simdOffset = (y % SIMD_TILE_Y_DIM) * SIMD_TILE_X_DIM + (x % SIMD_TILE_X_DIM); - - pSimdTile->GetSwizzledColor(simdOffset, outputColor); -#endif } ////////////////////////////////////////////////////////////////////////// @@ -1230,7 +1063,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 8>, SrcFormat, DstFormat> uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); -#if USE_8x2_TILE_BACKEND const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL; const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL; @@ -1262,27 +1094,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 8>, SrcFormat, DstFormat> ppDsts[2] += dy; ppDsts[3] += dy; } -#else - uint8_t* ppRows[] = { pDst, pDst + pDstSurface->pitch }; - - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM; ++row) - { - uint8_t* ppStartRows[] = { ppRows[0], ppRows[1] }; - - for (uint32_t col = 0; col < KNOB_TILE_X_DIM / SIMD_TILE_X_DIM; ++col) - { - // Format conversion and convert from SOA to AOS, and store the rows. - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppRows); - - ppRows[0] += KNOB_SIMD_WIDTH * DST_BYTES_PER_PIXEL / 2; - ppRows[1] += KNOB_SIMD_WIDTH * DST_BYTES_PER_PIXEL / 2; - pSrc += SRC_BYTES_PER_PIXEL * KNOB_SIMD_WIDTH; - } - - ppRows[0] = ppStartRows[0] + 2 * pDstSurface->pitch; - ppRows[1] = ppStartRows[1] + 2 * pDstSurface->pitch; - } -#endif } }; @@ -1317,7 +1128,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 16>, SrcFormat, DstFormat uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); -#if USE_8x2_TILE_BACKEND const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL; const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL; @@ -1349,27 +1159,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 16>, SrcFormat, DstFormat ppDsts[2] += dy; ppDsts[3] += dy; } -#else - uint8_t* ppRows[] = { pDst, pDst + pDstSurface->pitch }; - - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM; ++row) - { - uint8_t* ppStartRows[] = { ppRows[0], ppRows[1] }; - - for (uint32_t col = 0; col < KNOB_TILE_X_DIM / SIMD_TILE_X_DIM; ++col) - { - // Format conversion and convert from SOA to AOS, and store the rows. - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppRows); - - ppRows[0] += KNOB_SIMD_WIDTH * DST_BYTES_PER_PIXEL / 2; - ppRows[1] += KNOB_SIMD_WIDTH * DST_BYTES_PER_PIXEL / 2; - pSrc += SRC_BYTES_PER_PIXEL * KNOB_SIMD_WIDTH; - } - - ppRows[0] = ppStartRows[0] + 2 * pDstSurface->pitch; - ppRows[1] = ppStartRows[1] + 2 * pDstSurface->pitch; - } -#endif } }; @@ -1404,7 +1193,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 32>, SrcFormat, DstFormat uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); -#if USE_8x2_TILE_BACKEND const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL; const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL; @@ -1436,27 +1224,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 32>, SrcFormat, DstFormat ppDsts[2] += dy; ppDsts[3] += dy; } -#else - uint8_t* ppRows[] = { pDst, pDst + pDstSurface->pitch }; - - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM; ++row) - { - uint8_t* ppStartRows[] = { ppRows[0], ppRows[1] }; - - for (uint32_t col = 0; col < KNOB_TILE_X_DIM / SIMD_TILE_X_DIM; ++col) - { - // Format conversion and convert from SOA to AOS, and store the rows. - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppRows); - - ppRows[0] += KNOB_SIMD_WIDTH * DST_BYTES_PER_PIXEL / 2; - ppRows[1] += KNOB_SIMD_WIDTH * DST_BYTES_PER_PIXEL / 2; - pSrc += SRC_BYTES_PER_PIXEL * KNOB_SIMD_WIDTH; - } - - ppRows[0] = ppStartRows[0] + 2 * pDstSurface->pitch; - ppRows[1] = ppStartRows[1] + 2 * pDstSurface->pitch; - } -#endif } }; @@ -1470,10 +1237,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 64>, SrcFormat, DstFormat static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8; static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8; static const size_t MAX_DST_COLUMN_BYTES = 16; -#if !USE_8x2_TILE_BACKEND - static const size_t SRC_COLUMN_BYTES = KNOB_SIMD_WIDTH * SRC_BYTES_PER_PIXEL; - static const size_t DST_COLUMN_BYTES_PER_SRC = KNOB_SIMD_WIDTH * DST_BYTES_PER_PIXEL / 2; -#endif ////////////////////////////////////////////////////////////////////////// /// @brief Stores an 8x8 raster tile to the destination surface. @@ -1496,7 +1259,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 64>, SrcFormat, DstFormat uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); -#if USE_8x2_TILE_BACKEND const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL; const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch; @@ -1530,43 +1292,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 64>, SrcFormat, DstFormat ppDsts[i] += dy; } } -#else - uint8_t* ppDsts[] = - { - pDst, // row 0, col 0 - pDst + pDstSurface->pitch, // row 1, col 0 - pDst + MAX_DST_COLUMN_BYTES, // row 0, col 1 - pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES, // row 1, col 1 - }; - - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM; ++row) - { - uint8_t* ppStartRows[] = - { - ppDsts[0], - ppDsts[1], - ppDsts[2], - ppDsts[3], - }; - - for (uint32_t col = 0; col < KNOB_TILE_X_DIM / SIMD_TILE_X_DIM; ++col) - { - // Format conversion and convert from SOA to AOS, and store the rows. - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - - ppDsts[0] += DST_COLUMN_BYTES_PER_SRC; - ppDsts[1] += DST_COLUMN_BYTES_PER_SRC; - ppDsts[2] += DST_COLUMN_BYTES_PER_SRC; - ppDsts[3] += DST_COLUMN_BYTES_PER_SRC; - pSrc += SRC_COLUMN_BYTES; - } - - ppDsts[0] = ppStartRows[0] + 2 * pDstSurface->pitch; - ppDsts[1] = ppStartRows[1] + 2 * pDstSurface->pitch; - ppDsts[2] = ppStartRows[2] + 2 * pDstSurface->pitch; - ppDsts[3] = ppStartRows[3] + 2 * pDstSurface->pitch; - } -#endif } }; @@ -1580,10 +1305,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 128>, SrcFormat, DstForma static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8; static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8; static const size_t MAX_DST_COLUMN_BYTES = 16; -#if !USE_8x2_TILE_BACKEND - static const size_t SRC_COLUMN_BYTES = KNOB_SIMD_WIDTH * SRC_BYTES_PER_PIXEL; - static const size_t DST_COLUMN_BYTES_PER_SRC = KNOB_SIMD_WIDTH * DST_BYTES_PER_PIXEL / 2; -#endif ////////////////////////////////////////////////////////////////////////// /// @brief Stores an 8x8 raster tile to the destination surface. @@ -1606,7 +1327,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 128>, SrcFormat, DstForma uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); -#if USE_8x2_TILE_BACKEND const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL; const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch; @@ -1648,51 +1368,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 128>, SrcFormat, DstForma ppDsts[i] += dy; } } -#else - struct DstPtrs - { - uint8_t* ppDsts[8]; - } ptrs; - - // Need 8 pointers, 4 columns of 2 rows each - for (uint32_t y = 0; y < 2; ++y) - { - for (uint32_t x = 0; x < 4; ++x) - { - ptrs.ppDsts[x * 2 + y] = pDst + y * pDstSurface->pitch + x * MAX_DST_COLUMN_BYTES; - } - } - - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM; ++row) - { - DstPtrs startPtrs = ptrs; - - for (uint32_t col = 0; col < KNOB_TILE_X_DIM / SIMD_TILE_X_DIM; ++col) - { - // Format conversion and convert from SOA to AOS, and store the rows. - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ptrs.ppDsts); - - ptrs.ppDsts[0] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[1] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[2] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[3] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[4] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[5] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[6] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[7] += DST_COLUMN_BYTES_PER_SRC; - pSrc += SRC_COLUMN_BYTES; - } - - ptrs.ppDsts[0] = startPtrs.ppDsts[0] + 2 * pDstSurface->pitch; - ptrs.ppDsts[1] = startPtrs.ppDsts[1] + 2 * pDstSurface->pitch; - ptrs.ppDsts[2] = startPtrs.ppDsts[2] + 2 * pDstSurface->pitch; - ptrs.ppDsts[3] = startPtrs.ppDsts[3] + 2 * pDstSurface->pitch; - ptrs.ppDsts[4] = startPtrs.ppDsts[4] + 2 * pDstSurface->pitch; - ptrs.ppDsts[5] = startPtrs.ppDsts[5] + 2 * pDstSurface->pitch; - ptrs.ppDsts[6] = startPtrs.ppDsts[6] + 2 * pDstSurface->pitch; - ptrs.ppDsts[7] = startPtrs.ppDsts[7] + 2 * pDstSurface->pitch; - } -#endif } }; @@ -1728,7 +1403,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 8>, SrcFormat, Dst // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows. // We can compute the offsets to each column within the raster tile once and increment from these. -#if USE_8x2_TILE_BACKEND // There will be 4 8x2 simd tiles in an 8x8 raster tile. uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); @@ -1758,32 +1432,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 8>, SrcFormat, Dst ppDsts[2] += dy; ppDsts[3] += dy; } -#else - // There will be 8 4x2 simd tiles in an 8x8 raster tile. - uint8_t* pCol0 = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, - pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); - - // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE. - uint32_t pSrcInc = (FormatTraits<SrcFormat>::bpp * KNOB_SIMD_WIDTH) / 8; - - // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern. - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM; row += SIMD_TILE_Y_DIM) - { - uint32_t rowOffset = row * DestRowWidthBytes; - - uint8_t* pRow = pCol0 + rowOffset; - uint8_t* ppDsts[] = { pRow, pRow + DestRowWidthBytes }; - - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - pSrc += pSrcInc; - - ppDsts[0] += DestRowWidthBytes / 4; - ppDsts[1] += DestRowWidthBytes / 4; - - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - pSrc += pSrcInc; - } -#endif } }; @@ -1819,7 +1467,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 16>, SrcFormat, Ds // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows. // We can compute the offsets to each column within the raster tile once and increment from these. -#if USE_8x2_TILE_BACKEND // There will be 4 8x2 simd tiles in an 8x8 raster tile. uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); @@ -1849,32 +1496,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 16>, SrcFormat, Ds ppDsts[2] += dy; ppDsts[3] += dy; } -#else - // There will be 8 4x2 simd tiles in an 8x8 raster tile. - uint8_t* pCol0 = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, - pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); - - // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE. - uint32_t pSrcInc = (FormatTraits<SrcFormat>::bpp * KNOB_SIMD_WIDTH) / 8; - - // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern. - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM; row += SIMD_TILE_Y_DIM) - { - uint32_t rowOffset = row * DestRowWidthBytes; - - uint8_t* pRow = pCol0 + rowOffset; - uint8_t* ppDsts[] = { pRow, pRow + DestRowWidthBytes }; - - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - pSrc += pSrcInc; - - ppDsts[0] += DestRowWidthBytes / 2; - ppDsts[1] += DestRowWidthBytes / 2; - - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - pSrc += pSrcInc; - } -#endif } }; @@ -1911,7 +1532,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_XMAJOR, 32>, SrcFormat, Ds // TileX is a row-major tiling mode where each 4KB tile consist of 8 x 512B rows. // We can compute the offsets to each column within the raster tile once and increment from these. -#if USE_8x2_TILE_BACKEND uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); @@ -1945,28 +1565,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_XMAJOR, 32>, SrcFormat, Ds ppDsts[2] += dy; ppDsts[3] += dy; } -#else - uint8_t *pRow0 = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, - pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); - uint8_t* pRow1 = pRow0 + DestRowWidthBytes; - - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM; row += SIMD_TILE_Y_DIM) - { - for (uint32_t col = 0; col < KNOB_TILE_X_DIM; col += SIMD_TILE_X_DIM) - { - uint32_t xRowOffset = col * (FormatTraits<DstFormat>::bpp / 8); - - uint8_t* ppDsts[] = { pRow0 + xRowOffset, pRow1 + xRowOffset }; - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - - // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE. - pSrc += (FormatTraits<SrcFormat>::bpp * KNOB_SIMD_WIDTH) / 8; - } - - pRow0 += (DestRowWidthBytes * 2); - pRow1 += (DestRowWidthBytes * 2); - } -#endif } }; @@ -2003,7 +1601,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 32>, SrcFormat, Ds // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows. // We can compute the offsets to each column within the raster tile once and increment from these. -#if USE_8x2_TILE_BACKEND // There will be 4 8x2 simd tiles in an 8x8 raster tile. uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); @@ -2034,32 +1631,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 32>, SrcFormat, Ds ppDsts[2] += dy; ppDsts[3] += dy; } -#else - // There will be 8 4x2 simd tiles in an 8x8 raster tile. - uint8_t* pCol0 = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, - pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); - - // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE. - uint32_t pSrcInc = (FormatTraits<SrcFormat>::bpp * KNOB_SIMD_WIDTH) / 8; - - // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern. - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM; row += SIMD_TILE_Y_DIM) - { - uint32_t rowOffset = row * DestRowWidthBytes; - - uint8_t* pRow = pCol0 + rowOffset; - uint8_t* ppDsts[] = { pRow, pRow + DestRowWidthBytes }; - - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - pSrc += pSrcInc; - - ppDsts[0] += DestColumnBytes; - ppDsts[1] += DestColumnBytes; - - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - pSrc += pSrcInc; - } -#endif } }; @@ -2096,7 +1667,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 64>, SrcFormat, Ds // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows. // We can compute the offsets to each column within the raster tile once and increment from these. -#if USE_8x2_TILE_BACKEND // There will be 4 8x2 simd tiles in an 8x8 raster tile. uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); @@ -2131,40 +1701,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 64>, SrcFormat, Ds ppDsts[i] += dy; } } -#else - // There will be 8 4x2 simd tiles in an 8x8 raster tile. - uint8_t* pCol0 = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, - pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); - uint8_t* pCol1 = pCol0 + DestColumnBytes; - - // There are 4 columns, each 2 pixels wide when we have 64bpp pixels. - // Increment by a whole SIMD. 4x2 for AVX. 2x2 for SSE. - uint32_t pSrcInc = (FormatTraits<SrcFormat>::bpp * KNOB_SIMD_WIDTH) / 8; - - // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern. - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM; row += SIMD_TILE_Y_DIM) - { - uint32_t rowOffset = row * DestRowWidthBytes; - uint8_t* ppDsts[] = - { - pCol0 + rowOffset, - pCol0 + rowOffset + DestRowWidthBytes, - pCol1 + rowOffset, - pCol1 + rowOffset + DestRowWidthBytes, - }; - - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - pSrc += pSrcInc; - - ppDsts[0] += DestColumnBytes * 2; - ppDsts[1] += DestColumnBytes * 2; - ppDsts[2] += DestColumnBytes * 2; - ppDsts[3] += DestColumnBytes * 2; - - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts); - pSrc += pSrcInc; - } -#endif } }; @@ -2175,22 +1711,8 @@ template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat> struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, DstFormat> { typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, DstFormat> GenericStoreTile; -#if USE_8x2_TILE_BACKEND - static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8; - -#else - static const size_t TILE_Y_COL_WIDTH_BYTES = 16; - static const size_t TILE_Y_ROWS = 32; - static const size_t TILE_Y_COL_BYTES = TILE_Y_ROWS * TILE_Y_COL_WIDTH_BYTES; - - static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8; static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8; - static const size_t MAX_DST_COLUMN_BYTES = 16; - - static const size_t SRC_COLUMN_BYTES = KNOB_SIMD_WIDTH * SRC_BYTES_PER_PIXEL; - static const size_t DST_COLUMN_BYTES_PER_SRC = TILE_Y_COL_BYTES * 4; -#endif ////////////////////////////////////////////////////////////////////////// /// @brief Stores an 8x8 raster tile to the destination surface. /// @param pSrc - Pointer to raster tile. @@ -2201,10 +1723,8 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, D SWR_SURFACE_STATE* pDstSurface, uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex) { -#if USE_8x2_TILE_BACKEND static const uint32_t DestRowWidthBytes = 16; // 16B rows static const uint32_t DestColumnBytes = DestRowWidthBytes * 32; // 16B x 32 rows. -#endif // Punt non-full tiles to generic store uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U); @@ -2217,7 +1737,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, D // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows. // We can compute the offsets to each column within the raster tile once and increment from these. -#if USE_8x2_TILE_BACKEND // There will be 4 8x2 simd tiles in an 8x8 raster tile. uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); @@ -2260,54 +1779,6 @@ struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, D ppDsts[i] += dy; } } -#else - // There will be 8 4x2 simd tiles in an 8x8 raster tile. - uint8_t* pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex, - pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface); - struct DstPtrs - { - uint8_t* ppDsts[8]; - } ptrs; - - // Need 8 pointers, 4 columns of 2 rows each - for (uint32_t y = 0; y < 2; ++y) - { - for (uint32_t x = 0; x < 4; ++x) - { - ptrs.ppDsts[x * 2 + y] = pDst + y * TILE_Y_COL_WIDTH_BYTES + x * TILE_Y_COL_BYTES; - } - } - - for (uint32_t row = 0; row < KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM; ++row) - { - DstPtrs startPtrs = ptrs; - - for (uint32_t col = 0; col < KNOB_TILE_X_DIM / SIMD_TILE_X_DIM; ++col) - { - // Format conversion and convert from SOA to AOS, and store the rows. - ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ptrs.ppDsts); - - ptrs.ppDsts[0] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[1] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[2] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[3] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[4] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[5] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[6] += DST_COLUMN_BYTES_PER_SRC; - ptrs.ppDsts[7] += DST_COLUMN_BYTES_PER_SRC; - pSrc += SRC_COLUMN_BYTES; - } - - ptrs.ppDsts[0] = startPtrs.ppDsts[0] + 2 * TILE_Y_COL_WIDTH_BYTES; - ptrs.ppDsts[1] = startPtrs.ppDsts[1] + 2 * TILE_Y_COL_WIDTH_BYTES; - ptrs.ppDsts[2] = startPtrs.ppDsts[2] + 2 * TILE_Y_COL_WIDTH_BYTES; - ptrs.ppDsts[3] = startPtrs.ppDsts[3] + 2 * TILE_Y_COL_WIDTH_BYTES; - ptrs.ppDsts[4] = startPtrs.ppDsts[4] + 2 * TILE_Y_COL_WIDTH_BYTES; - ptrs.ppDsts[5] = startPtrs.ppDsts[5] + 2 * TILE_Y_COL_WIDTH_BYTES; - ptrs.ppDsts[6] = startPtrs.ppDsts[6] + 2 * TILE_Y_COL_WIDTH_BYTES; - ptrs.ppDsts[7] = startPtrs.ppDsts[7] + 2 * TILE_Y_COL_WIDTH_BYTES; - } -#endif } }; |