diff options
author | Tim Rowley <[email protected]> | 2016-06-10 11:31:16 -0600 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2016-06-23 10:50:41 -0500 |
commit | c7cd33b605f0238464a3250a11f7134e4b7d22a6 (patch) | |
tree | 6759d9e5be116763d34943271236e515eaf0700f | |
parent | c867c22d855163ecbf18d5606b27c9d2cb50a148 (diff) |
swr: [rasterizer core] conservative rasterization frontend support
Reviewed-by: Bruce Cherniak <[email protected]>
-rw-r--r-- | src/gallium/drivers/swr/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 13 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/clip.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/conservativeRast.h | 120 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/context.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 163 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.h | 43 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/rasterizer.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/state.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/utils.h | 30 |
10 files changed, 325 insertions, 63 deletions
diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources index f9448eec6c6..8d97a75c68b 100644 --- a/src/gallium/drivers/swr/Makefile.sources +++ b/src/gallium/drivers/swr/Makefile.sources @@ -67,6 +67,7 @@ CORE_CXX_SOURCES := \ rasterizer/core/blend.h \ rasterizer/core/clip.cpp \ rasterizer/core/clip.h \ + rasterizer/core/conservativeRast.h \ rasterizer/core/context.h \ rasterizer/core/depthstencil.h \ rasterizer/core/fifo.hpp \ diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 22a94fbf731..cec451929a3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -780,10 +780,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC) const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0; const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0; const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0; - - // currently only support 'normal' input coverage - SWR_ASSERT(psState.inputCoverage == SWR_INPUT_COVERAGE_NORMAL || - psState.inputCoverage == SWR_INPUT_COVERAGE_NONE); + const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE); SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask; @@ -795,20 +792,20 @@ void SetupPipeline(DRAW_CONTEXT *pDC) { // always need to generate I & J per sample for Z interpolation barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK); - backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage][centroid][forcedSampleCount][canEarlyZ]; + backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][inputCoverage][centroid][forcedSampleCount][canEarlyZ]; } else { // always need to generate I & J per pixel for Z interpolation barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK); - backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ]; + backendFuncs.pfnBackend = gBackendSingleSample[inputCoverage][centroid][canEarlyZ]; } break; case SWR_SHADING_RATE_SAMPLE: SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN); // always need to generate I & J per sample for Z interpolation barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK); - backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ]; + backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][inputCoverage][centroid][canEarlyZ]; break; default: SWR_ASSERT(0 && "Invalid shading rate"); @@ -833,7 +830,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC) break; default: pState->pfnProcessPrims = ClipTriangles; - pfnBinner = BinTriangles; + pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0)); break; }; diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 67a4c4f47bb..1a6fc6d2873 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -385,7 +385,7 @@ public: PRIMITIVE_TOPOLOGY clipTopology = TOP_UNKNOWN; if (NumVertsPerPrim == 3) { - pfnBinFunc = BinTriangles; + pfnBinFunc = GetBinTrianglesFunc((pa.pDC->pState->state.rastState.conservativeRast > 0)); clipTopology = TOP_TRIANGLE_FAN; // so that the binner knows to bloat wide points later @@ -519,7 +519,7 @@ public: pfnBinner = BinLines; break; default: - pfnBinner = BinTriangles; + pfnBinner = GetBinTrianglesFunc((pa.pDC->pState->state.rastState.conservativeRast > 0)); break; }; diff --git a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h new file mode 100644 index 00000000000..f8aa8df76c9 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h @@ -0,0 +1,120 @@ +/**************************************************************************** +* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +* +* @file conservativerast.h +* +******************************************************************************/ +#pragma once +#include <type_traits> +#include "common/simdintrin.h" + +enum FixedPointFmt +{ + FP_UNINIT, + _16_8, + _16_9 +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief convenience typedefs for supported Fixed Point precisions +typedef std::integral_constant<uint32_t, FP_UNINIT> Fixed_Uninit; +typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8; +typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9; + +////////////////////////////////////////////////////////////////////////// +/// @struct FixedPointTraits +/// @brief holds constants relating to converting between FP and Fixed point +/// @tparam FT: fixed precision type +template<typename FT> +struct FixedPointTraits{}; + +////////////////////////////////////////////////////////////////////////// +/// @brief Fixed_16_8 specialization of FixedPointTraits +template<> +struct FixedPointTraits<Fixed_16_8> +{ + /// multiplier to go from FP32 to Fixed Point 16.8 + typedef std::integral_constant<uint32_t, 256> FixedPointScaleT; + /// number of bits to shift to go from 16.8 fixed => int32 + typedef std::integral_constant<uint32_t, 8> FixedPointShiftT; + typedef Fixed_16_8 TypeT; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief Fixed_16_9 specialization of FixedPointTraits +template<> +struct FixedPointTraits<Fixed_16_9> +{ + /// multiplier to go from FP32 to Fixed Point 16.9 + typedef std::integral_constant<uint32_t, 512> FixedPointScaleT; + /// number of bits to shift to go from 16.9 fixed => int32 + typedef std::integral_constant<uint32_t, 9> FixedPointShiftT; + typedef Fixed_16_9 TypeT; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief convenience typedefs for conservative rasterization modes +typedef std::false_type StandardRastT; +typedef std::true_type ConservativeRastT; + +////////////////////////////////////////////////////////////////////////// +/// @brief convenience typedefs for Input Coverage rasterization modes +typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE> NoInputCoverageT; +typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NORMAL> OuterConservativeCoverageT; +typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE> InnerConservativeCoverageT; + +////////////////////////////////////////////////////////////////////////// +/// @struct ConservativeRastTraits +/// @brief primary ConservativeRastTraits template. Shouldn't be instantiated +/// @tparam ConservativeT: type of conservative rasterization +template <typename ConservativeT> +struct ConservativeRastFETraits {}; + +////////////////////////////////////////////////////////////////////////// +/// @brief StandardRast specialization of ConservativeRastTraits +template <> +struct ConservativeRastFETraits<StandardRastT> +{ + typedef std::false_type IsConservativeT; + typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT; + typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief ConservativeRastT specialization of ConservativeRastTraits +template <> +struct ConservativeRastFETraits<ConservativeRastT> +{ + typedef std::true_type IsConservativeT; + typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT; + + /// Conservative bounding box needs to expand the area around each vertex by 1/512, which + /// is the potential snapping error when going from FP-> 16.8 fixed + typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT; + typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT; + typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief convenience typedefs for ConservativeRastFETraits +typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT; +typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT; diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 03e583796de..320435281d8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -83,6 +83,8 @@ struct SWR_TRIANGLE_DESC float *pUserClipBuffer; uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES]; + uint64_t conservativeCoverageMask; + uint64_t innerConservativeCoverageMask; uint64_t anyCoveredSamples; TRI_FLAGS triFlags; diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index f86f8faa979..7f3e33e7aa7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -33,6 +33,7 @@ #include "context.h" #include "rdtsc_core.h" #include "rasterizer.h" +#include "conservativeRast.h" #include "utils.h" #include "threads.h" #include "pa.h" @@ -1590,6 +1591,132 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask, } ////////////////////////////////////////////////////////////////////////// +/// @brief Convert the X,Y coords of a triangle to the requested Fixed +/// Point precision from FP32. +template <typename PT = FixedPointTraits<Fixed_16_8>> +INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn) +{ + simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(PT::FixedPointScaleT::value)); + return _simd_cvtps_epi32(vFixed); +} + +////////////////////////////////////////////////////////////////////////// +/// @brief Helper function to set the X,Y coords of a triangle to the +/// requested Fixed Point precision from FP32. If the RequestedT +/// FixedPointTraits precision is the same as the CurrentT, no extra +/// conversions will be done. If they are different, convert from FP32 +/// to the Requested precision and set vXi, vYi +/// @tparam RequestedT: requested FixedPointTraits type +/// @tparam CurrentT: FixedPointTraits type of the last +template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>> +struct FPToFixedPoint +{ + ////////////////////////////////////////////////////////////////////////// + /// @param tri: simdvector[3] of FP triangle verts + /// @param vXi: fixed point X coords of tri verts + /// @param vYi: fixed point Y coords of tri verts + INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]) + { + vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x); + vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y); + vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x); + vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y); + vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x); + vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y); + }; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief In the case where the RequestedT and CurrentT fixed point +/// precisions are the same, do nothing. +template<typename RequestedT> +struct FPToFixedPoint<RequestedT, RequestedT> +{ + INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){}; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief Calculate bounding box for current triangle +/// @tparam CT: ConservativeRastFETraits type +/// @param vX: fixed point X position for triangle verts +/// @param vY: fixed point Y position for triangle verts +/// @param bbox: fixed point bbox +/// *Note*: expects vX, vY to be in the correct precision for the type +/// of rasterization. This avoids unnecessary FP->fixed conversions. +template <typename CT> +INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){} + +////////////////////////////////////////////////////////////////////////// +/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical +template <> +INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox) +{ + // FE conservative rast traits + typedef FEStandardRastT CT; + + static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision"); + // Update vXi, vYi fixed point precision for BBox calculation if necessary + FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY); + + simdscalari vMinX = vX[0]; + vMinX = _simd_min_epi32(vMinX, vX[1]); + vMinX = _simd_min_epi32(vMinX, vX[2]); + + simdscalari vMaxX = vX[0]; + vMaxX = _simd_max_epi32(vMaxX, vX[1]); + vMaxX = _simd_max_epi32(vMaxX, vX[2]); + + simdscalari vMinY = vY[0]; + vMinY = _simd_min_epi32(vMinY, vY[1]); + vMinY = _simd_min_epi32(vMinY, vY[2]); + + simdscalari vMaxY = vY[0]; + vMaxY = _simd_max_epi32(vMaxY, vY[1]); + vMaxY = _simd_max_epi32(vMaxY, vY[2]); + + bbox.left = vMinX; + bbox.right = vMaxX; + bbox.top = vMinY; + bbox.bottom = vMaxY; +} + +////////////////////////////////////////////////////////////////////////// +/// @brief FEConservativeRastT specialization of calcBoundingBoxIntVertical +/// Offsets BBox for conservative rast +template <> +INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox) +{ + // FE conservative rast traits + typedef FEConservativeRastT CT; + + static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision"); + // Update vXi, vYi fixed point precision for BBox calculation if necessary + FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY); + + simdscalari vMinX = vX[0]; + vMinX = _simd_min_epi32(vMinX, vX[1]); + vMinX = _simd_min_epi32(vMinX, vX[2]); + + simdscalari vMaxX = vX[0]; + vMaxX = _simd_max_epi32(vMaxX, vX[1]); + vMaxX = _simd_max_epi32(vMaxX, vX[2]); + + simdscalari vMinY = vY[0]; + vMinY = _simd_min_epi32(vMinY, vY[1]); + vMinY = _simd_min_epi32(vMinY, vY[2]); + + simdscalari vMaxY = vY[0]; + vMaxY = _simd_max_epi32(vMaxY, vY[1]); + vMaxY = _simd_max_epi32(vMaxY, vY[2]); + + /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization + bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); + bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); + bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); + bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); +} + +////////////////////////////////////////////////////////////////////////// /// @brief Bin triangle primitives to macro tiles. Performs setup, clipping /// culling, viewport transform, etc. /// @param pDC - pointer to draw context. @@ -1597,6 +1724,8 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask, /// @param workerId - thread's worker id. Even thread has a unique id. /// @param tri - Contains triangle position data for SIMDs worth of triangles. /// @param primID - Primitive ID for each triangle. +/// @tparam CT - ConservativeRastFETraits +template <typename CT> void BinTriangles( DRAW_CONTEXT *pDC, PA_STATE& pa, @@ -1652,14 +1781,9 @@ void BinTriangles( tri[2].x = _simd_add_ps(tri[2].x, offset); tri[2].y = _simd_add_ps(tri[2].y, offset); - // convert to fixed point simdscalari vXi[3], vYi[3]; - vXi[0] = fpToFixedPointVertical(tri[0].x); - vYi[0] = fpToFixedPointVertical(tri[0].y); - vXi[1] = fpToFixedPointVertical(tri[1].x); - vYi[1] = fpToFixedPointVertical(tri[1].y); - vXi[2] = fpToFixedPointVertical(tri[2].x); - vYi[2] = fpToFixedPointVertical(tri[2].y); + // Set vXi, vYi to fixed point precision required for degenerate triangle check + FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi); // triangle setup simdscalari vAi[3], vBi[3]; @@ -1669,6 +1793,8 @@ void BinTriangles( simdscalari vDet[2]; calcDeterminantIntVertical(vAi, vBi, vDet); + /// todo: handle degen tri's for Conservative Rast. + // cull zero area int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si()))); int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si()))); @@ -1713,6 +1839,7 @@ void BinTriangles( RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0); } + /// Note: these variable initializations must stay above any 'goto endBenTriangles' // compute per tri backface uint32_t frontFaceMask = frontWindingTris; @@ -1726,14 +1853,13 @@ void BinTriangles( // Calc bounding box of triangles simdBBox bbox; - calcBoundingBoxIntVertical(vXi, vYi, bbox); + calcBoundingBoxIntVertical<CT>(tri, vXi, vYi, bbox); // determine if triangle falls between pixel centers and discard - // only discard for non-MSAA case + // only discard for non-MSAA case and when conservative rast is disabled // (left + 127) & ~255 // (right + 128) & ~255 - - if(rastState.sampleCount == SWR_MULTISAMPLE_1X) + if(rastState.sampleCount == SWR_MULTISAMPLE_1X && (!CT::IsConservativeT::value)) { origTriMask = triMask; @@ -1891,7 +2017,22 @@ endBinTriangles: RDTSC_STOP(FEBinTriangles, 1, 0); } +struct FEBinTrianglesChooser +{ + typedef PFN_PROCESS_PRIMS FuncType; + + template <typename... ArgsB> + static FuncType GetFunc() + { + return BinTriangles<ConservativeRastFETraits<ArgsB...>>; + } +}; +// Selector for correct templated Draw front-end function +PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative) +{ + return TemplateArgUnroller<FEBinTrianglesChooser>::GetFunc(IsConservative); +} ////////////////////////////////////////////////////////////////////////// /// @brief Bin SIMD points to the backend. Only supports point size of 1 diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index dfd3987bdfb..2de5d269036 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -30,21 +30,6 @@ #include "context.h" #include <type_traits> -INLINE -__m128i fpToFixedPoint(const __m128 vIn) -{ - __m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE)); - return _mm_cvtps_epi32(vFixed); -} - -INLINE -simdscalari fpToFixedPointVertical(const simdscalar vIn) -{ - simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(FIXED_POINT_SCALE)); - return _simd_cvtps_epi32(vFixed); -} - - // Calculates the A and B coefficients for the 3 edges of the triangle // // maths for edge equations: @@ -272,31 +257,6 @@ A = _mm_shuffle_ps(A, B, 1 0 1 0) } INLINE -void calcBoundingBoxIntVertical(const simdscalari (&vX)[3], const simdscalari (&vY)[3], simdBBox &bbox) -{ - simdscalari vMinX = vX[0]; - vMinX = _simd_min_epi32(vMinX, vX[1]); - vMinX = _simd_min_epi32(vMinX, vX[2]); - - simdscalari vMaxX = vX[0]; - vMaxX = _simd_max_epi32(vMaxX, vX[1]); - vMaxX = _simd_max_epi32(vMaxX, vX[2]); - - simdscalari vMinY = vY[0]; - vMinY = _simd_min_epi32(vMinY, vY[1]); - vMinY = _simd_min_epi32(vMinY, vY[2]); - - simdscalari vMaxY = vY[0]; - vMaxY = _simd_max_epi32(vMaxY, vY[1]); - vMaxY = _simd_max_epi32(vMaxY, vY[2]); - - bbox.left = vMinX; - bbox.right = vMaxX; - bbox.top = vMinY; - bbox.bottom = vMaxY; -} - -INLINE bool CanUseSimplePoints(DRAW_CONTEXT *pDC) { const API_STATE& state = GetApiState(pDC); @@ -334,8 +294,9 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uin void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); void ProcessQueryStats(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData); +PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative); + struct PA_STATE_BASE; // forward decl -void BinTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector tri[3], uint32_t primMask, simdscalari primID); void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID); void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID); diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h index bcfeef48410..d3faf2aa6c9 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h @@ -28,8 +28,16 @@ #pragma once #include "context.h" +#include <type_traits> extern PFN_WORK_FUNC gRasterizerTable[2][SWR_MULTISAMPLE_TYPE_MAX]; void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData); void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData); void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData); + +INLINE +__m128i fpToFixedPoint(const __m128 vIn) +{ + __m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE)); + return _mm_cvtps_epi32(vFixed); +}
\ No newline at end of file diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 5156c6b1322..05735b3f32b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -909,6 +909,7 @@ struct SWR_RASTSTATE uint32_t forcedSampleCount : 1; uint32_t pixelOffset : 1; uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units + uint32_t conservativeRast : 1; float pointSize; float lineWidth; @@ -989,6 +990,7 @@ enum SWR_INPUT_COVERAGE { SWR_INPUT_COVERAGE_NONE, SWR_INPUT_COVERAGE_NORMAL, + SWR_INPUT_COVERAGE_INNER_CONSERVATIVE, SWR_INPUT_COVERAGE_MAX, }; @@ -1016,7 +1018,7 @@ struct SWR_PS_STATE // dword 2 uint32_t killsPixel : 1; // pixel shader can kill pixels - uint32_t inputCoverage : 1; // type of input coverage PS uses + uint32_t inputCoverage : 1; // ps uses input coverage uint32_t writesODepth : 1; // pixel shader writes to depth uint32_t usesSourceDepth : 1; // pixel shader reads depth uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h index 2853f98fd78..f1b028ec681 100644 --- a/src/gallium/drivers/swr/rasterizer/core/utils.h +++ b/src/gallium/drivers/swr/rasterizer/core/utils.h @@ -849,6 +849,36 @@ struct TemplateArgUnroller return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...); } + + // Last Arg Terminator + template <typename... TArgsT> + static typename TermT::FuncType GetFunc(uint32_t iArg) + { + switch(iArg) + { + case 0: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 0>>(); + case 1: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 1>>(); + case 2: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 2>>(); + case 3: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 3>>(); + case 4: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 4>>(); + default: SWR_ASSUME(false); return nullptr; + } + } + + // Recursively parse args + template <typename... TArgsT> + static typename TermT::FuncType GetFunc(uint32_t iArg, TArgsT... remainingArgs) + { + switch(iArg) + { + case 0: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 0>>::GetFunc(remainingArgs...); + case 1: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 1>>::GetFunc(remainingArgs...); + case 2: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 2>>::GetFunc(remainingArgs...); + case 3: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 3>>::GetFunc(remainingArgs...); + case 4: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 4>>::GetFunc(remainingArgs...); + default: SWR_ASSUME(false); return nullptr; + } + } }; |