summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-06-10 11:31:16 -0600
committerTim Rowley <[email protected]>2016-06-23 10:50:41 -0500
commitc7cd33b605f0238464a3250a11f7134e4b7d22a6 (patch)
tree6759d9e5be116763d34943271236e515eaf0700f
parentc867c22d855163ecbf18d5606b27c9d2cb50a148 (diff)
swr: [rasterizer core] conservative rasterization frontend support
Reviewed-by: Bruce Cherniak <[email protected]>
-rw-r--r--src/gallium/drivers/swr/Makefile.sources1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp13
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/conservativeRast.h120
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp163
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.h43
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.h8
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/utils.h30
10 files changed, 325 insertions, 63 deletions
diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources
index f9448eec6c6..8d97a75c68b 100644
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -67,6 +67,7 @@ CORE_CXX_SOURCES := \
rasterizer/core/blend.h \
rasterizer/core/clip.cpp \
rasterizer/core/clip.h \
+ rasterizer/core/conservativeRast.h \
rasterizer/core/context.h \
rasterizer/core/depthstencil.h \
rasterizer/core/fifo.hpp \
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 22a94fbf731..cec451929a3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -780,10 +780,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
-
- // currently only support 'normal' input coverage
- SWR_ASSERT(psState.inputCoverage == SWR_INPUT_COVERAGE_NORMAL ||
- psState.inputCoverage == SWR_INPUT_COVERAGE_NONE);
+ const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE);
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
@@ -795,20 +792,20 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
{
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage][centroid][forcedSampleCount][canEarlyZ];
+ backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][inputCoverage][centroid][forcedSampleCount][canEarlyZ];
}
else
{
// always need to generate I & J per pixel for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
- backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
+ backendFuncs.pfnBackend = gBackendSingleSample[inputCoverage][centroid][canEarlyZ];
}
break;
case SWR_SHADING_RATE_SAMPLE:
SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
+ backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][inputCoverage][centroid][canEarlyZ];
break;
default:
SWR_ASSERT(0 && "Invalid shading rate");
@@ -833,7 +830,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
break;
default:
pState->pfnProcessPrims = ClipTriangles;
- pfnBinner = BinTriangles;
+ pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
break;
};
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 67a4c4f47bb..1a6fc6d2873 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -385,7 +385,7 @@ public:
PRIMITIVE_TOPOLOGY clipTopology = TOP_UNKNOWN;
if (NumVertsPerPrim == 3)
{
- pfnBinFunc = BinTriangles;
+ pfnBinFunc = GetBinTrianglesFunc((pa.pDC->pState->state.rastState.conservativeRast > 0));
clipTopology = TOP_TRIANGLE_FAN;
// so that the binner knows to bloat wide points later
@@ -519,7 +519,7 @@ public:
pfnBinner = BinLines;
break;
default:
- pfnBinner = BinTriangles;
+ pfnBinner = GetBinTrianglesFunc((pa.pDC->pState->state.rastState.conservativeRast > 0));
break;
};
diff --git a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
new file mode 100644
index 00000000000..f8aa8df76c9
--- /dev/null
+++ b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
@@ -0,0 +1,120 @@
+/****************************************************************************
+* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file conservativerast.h
+*
+******************************************************************************/
+#pragma once
+#include <type_traits>
+#include "common/simdintrin.h"
+
+enum FixedPointFmt
+{
+ FP_UNINIT,
+ _16_8,
+ _16_9
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief convenience typedefs for supported Fixed Point precisions
+typedef std::integral_constant<uint32_t, FP_UNINIT> Fixed_Uninit;
+typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8;
+typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9;
+
+//////////////////////////////////////////////////////////////////////////
+/// @struct FixedPointTraits
+/// @brief holds constants relating to converting between FP and Fixed point
+/// @tparam FT: fixed precision type
+template<typename FT>
+struct FixedPointTraits{};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Fixed_16_8 specialization of FixedPointTraits
+template<>
+struct FixedPointTraits<Fixed_16_8>
+{
+ /// multiplier to go from FP32 to Fixed Point 16.8
+ typedef std::integral_constant<uint32_t, 256> FixedPointScaleT;
+ /// number of bits to shift to go from 16.8 fixed => int32
+ typedef std::integral_constant<uint32_t, 8> FixedPointShiftT;
+ typedef Fixed_16_8 TypeT;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Fixed_16_9 specialization of FixedPointTraits
+template<>
+struct FixedPointTraits<Fixed_16_9>
+{
+ /// multiplier to go from FP32 to Fixed Point 16.9
+ typedef std::integral_constant<uint32_t, 512> FixedPointScaleT;
+ /// number of bits to shift to go from 16.9 fixed => int32
+ typedef std::integral_constant<uint32_t, 9> FixedPointShiftT;
+ typedef Fixed_16_9 TypeT;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief convenience typedefs for conservative rasterization modes
+typedef std::false_type StandardRastT;
+typedef std::true_type ConservativeRastT;
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief convenience typedefs for Input Coverage rasterization modes
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE> NoInputCoverageT;
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NORMAL> OuterConservativeCoverageT;
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE> InnerConservativeCoverageT;
+
+//////////////////////////////////////////////////////////////////////////
+/// @struct ConservativeRastTraits
+/// @brief primary ConservativeRastTraits template. Shouldn't be instantiated
+/// @tparam ConservativeT: type of conservative rasterization
+template <typename ConservativeT>
+struct ConservativeRastFETraits {};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief StandardRast specialization of ConservativeRastTraits
+template <>
+struct ConservativeRastFETraits<StandardRastT>
+{
+ typedef std::false_type IsConservativeT;
+ typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT;
+ typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief ConservativeRastT specialization of ConservativeRastTraits
+template <>
+struct ConservativeRastFETraits<ConservativeRastT>
+{
+ typedef std::true_type IsConservativeT;
+ typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT;
+
+ /// Conservative bounding box needs to expand the area around each vertex by 1/512, which
+ /// is the potential snapping error when going from FP-> 16.8 fixed
+ typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT;
+ typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
+ typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief convenience typedefs for ConservativeRastFETraits
+typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT;
+typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT;
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 03e583796de..320435281d8 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -83,6 +83,8 @@ struct SWR_TRIANGLE_DESC
float *pUserClipBuffer;
uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
+ uint64_t conservativeCoverageMask;
+ uint64_t innerConservativeCoverageMask;
uint64_t anyCoveredSamples;
TRI_FLAGS triFlags;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index f86f8faa979..7f3e33e7aa7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -33,6 +33,7 @@
#include "context.h"
#include "rdtsc_core.h"
#include "rasterizer.h"
+#include "conservativeRast.h"
#include "utils.h"
#include "threads.h"
#include "pa.h"
@@ -1590,6 +1591,132 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask,
}
//////////////////////////////////////////////////////////////////////////
+/// @brief Convert the X,Y coords of a triangle to the requested Fixed
+/// Point precision from FP32.
+template <typename PT = FixedPointTraits<Fixed_16_8>>
+INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn)
+{
+ simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(PT::FixedPointScaleT::value));
+ return _simd_cvtps_epi32(vFixed);
+}
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Helper function to set the X,Y coords of a triangle to the
+/// requested Fixed Point precision from FP32. If the RequestedT
+/// FixedPointTraits precision is the same as the CurrentT, no extra
+/// conversions will be done. If they are different, convert from FP32
+/// to the Requested precision and set vXi, vYi
+/// @tparam RequestedT: requested FixedPointTraits type
+/// @tparam CurrentT: FixedPointTraits type of the last
+template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>>
+struct FPToFixedPoint
+{
+ //////////////////////////////////////////////////////////////////////////
+ /// @param tri: simdvector[3] of FP triangle verts
+ /// @param vXi: fixed point X coords of tri verts
+ /// @param vYi: fixed point Y coords of tri verts
+ INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3])
+ {
+ vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x);
+ vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y);
+ vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x);
+ vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y);
+ vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x);
+ vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y);
+ };
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief In the case where the RequestedT and CurrentT fixed point
+/// precisions are the same, do nothing.
+template<typename RequestedT>
+struct FPToFixedPoint<RequestedT, RequestedT>
+{
+ INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){};
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Calculate bounding box for current triangle
+/// @tparam CT: ConservativeRastFETraits type
+/// @param vX: fixed point X position for triangle verts
+/// @param vY: fixed point Y position for triangle verts
+/// @param bbox: fixed point bbox
+/// *Note*: expects vX, vY to be in the correct precision for the type
+/// of rasterization. This avoids unnecessary FP->fixed conversions.
+template <typename CT>
+INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){}
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical
+template <>
+INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
+{
+ // FE conservative rast traits
+ typedef FEStandardRastT CT;
+
+ static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision");
+ // Update vXi, vYi fixed point precision for BBox calculation if necessary
+ FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
+
+ simdscalari vMinX = vX[0];
+ vMinX = _simd_min_epi32(vMinX, vX[1]);
+ vMinX = _simd_min_epi32(vMinX, vX[2]);
+
+ simdscalari vMaxX = vX[0];
+ vMaxX = _simd_max_epi32(vMaxX, vX[1]);
+ vMaxX = _simd_max_epi32(vMaxX, vX[2]);
+
+ simdscalari vMinY = vY[0];
+ vMinY = _simd_min_epi32(vMinY, vY[1]);
+ vMinY = _simd_min_epi32(vMinY, vY[2]);
+
+ simdscalari vMaxY = vY[0];
+ vMaxY = _simd_max_epi32(vMaxY, vY[1]);
+ vMaxY = _simd_max_epi32(vMaxY, vY[2]);
+
+ bbox.left = vMinX;
+ bbox.right = vMaxX;
+ bbox.top = vMinY;
+ bbox.bottom = vMaxY;
+}
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief FEConservativeRastT specialization of calcBoundingBoxIntVertical
+/// Offsets BBox for conservative rast
+template <>
+INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox)
+{
+ // FE conservative rast traits
+ typedef FEConservativeRastT CT;
+
+ static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision");
+ // Update vXi, vYi fixed point precision for BBox calculation if necessary
+ FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY);
+
+ simdscalari vMinX = vX[0];
+ vMinX = _simd_min_epi32(vMinX, vX[1]);
+ vMinX = _simd_min_epi32(vMinX, vX[2]);
+
+ simdscalari vMaxX = vX[0];
+ vMaxX = _simd_max_epi32(vMaxX, vX[1]);
+ vMaxX = _simd_max_epi32(vMaxX, vX[2]);
+
+ simdscalari vMinY = vY[0];
+ vMinY = _simd_min_epi32(vMinY, vY[1]);
+ vMinY = _simd_min_epi32(vMinY, vY[2]);
+
+ simdscalari vMaxY = vY[0];
+ vMaxY = _simd_max_epi32(vMaxY, vY[1]);
+ vMaxY = _simd_max_epi32(vMaxY, vY[2]);
+
+ /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
+ bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+ bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+ bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+ bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value);
+}
+
+//////////////////////////////////////////////////////////////////////////
/// @brief Bin triangle primitives to macro tiles. Performs setup, clipping
/// culling, viewport transform, etc.
/// @param pDC - pointer to draw context.
@@ -1597,6 +1724,8 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask,
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param tri - Contains triangle position data for SIMDs worth of triangles.
/// @param primID - Primitive ID for each triangle.
+/// @tparam CT - ConservativeRastFETraits
+template <typename CT>
void BinTriangles(
DRAW_CONTEXT *pDC,
PA_STATE& pa,
@@ -1652,14 +1781,9 @@ void BinTriangles(
tri[2].x = _simd_add_ps(tri[2].x, offset);
tri[2].y = _simd_add_ps(tri[2].y, offset);
- // convert to fixed point
simdscalari vXi[3], vYi[3];
- vXi[0] = fpToFixedPointVertical(tri[0].x);
- vYi[0] = fpToFixedPointVertical(tri[0].y);
- vXi[1] = fpToFixedPointVertical(tri[1].x);
- vYi[1] = fpToFixedPointVertical(tri[1].y);
- vXi[2] = fpToFixedPointVertical(tri[2].x);
- vYi[2] = fpToFixedPointVertical(tri[2].y);
+ // Set vXi, vYi to fixed point precision required for degenerate triangle check
+ FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi);
// triangle setup
simdscalari vAi[3], vBi[3];
@@ -1669,6 +1793,8 @@ void BinTriangles(
simdscalari vDet[2];
calcDeterminantIntVertical(vAi, vBi, vDet);
+ /// todo: handle degen tri's for Conservative Rast.
+
// cull zero area
int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si())));
int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si())));
@@ -1713,6 +1839,7 @@ void BinTriangles(
RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
}
+ /// Note: these variable initializations must stay above any 'goto endBenTriangles'
// compute per tri backface
uint32_t frontFaceMask = frontWindingTris;
@@ -1726,14 +1853,13 @@ void BinTriangles(
// Calc bounding box of triangles
simdBBox bbox;
- calcBoundingBoxIntVertical(vXi, vYi, bbox);
+ calcBoundingBoxIntVertical<CT>(tri, vXi, vYi, bbox);
// determine if triangle falls between pixel centers and discard
- // only discard for non-MSAA case
+ // only discard for non-MSAA case and when conservative rast is disabled
// (left + 127) & ~255
// (right + 128) & ~255
-
- if(rastState.sampleCount == SWR_MULTISAMPLE_1X)
+ if(rastState.sampleCount == SWR_MULTISAMPLE_1X && (!CT::IsConservativeT::value))
{
origTriMask = triMask;
@@ -1891,7 +2017,22 @@ endBinTriangles:
RDTSC_STOP(FEBinTriangles, 1, 0);
}
+struct FEBinTrianglesChooser
+{
+ typedef PFN_PROCESS_PRIMS FuncType;
+
+ template <typename... ArgsB>
+ static FuncType GetFunc()
+ {
+ return BinTriangles<ConservativeRastFETraits<ArgsB...>>;
+ }
+};
+// Selector for correct templated Draw front-end function
+PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative)
+{
+ return TemplateArgUnroller<FEBinTrianglesChooser>::GetFunc(IsConservative);
+}
//////////////////////////////////////////////////////////////////////////
/// @brief Bin SIMD points to the backend. Only supports point size of 1
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index dfd3987bdfb..2de5d269036 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -30,21 +30,6 @@
#include "context.h"
#include <type_traits>
-INLINE
-__m128i fpToFixedPoint(const __m128 vIn)
-{
- __m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
- return _mm_cvtps_epi32(vFixed);
-}
-
-INLINE
-simdscalari fpToFixedPointVertical(const simdscalar vIn)
-{
- simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(FIXED_POINT_SCALE));
- return _simd_cvtps_epi32(vFixed);
-}
-
-
// Calculates the A and B coefficients for the 3 edges of the triangle
//
// maths for edge equations:
@@ -272,31 +257,6 @@ A = _mm_shuffle_ps(A, B, 1 0 1 0)
}
INLINE
-void calcBoundingBoxIntVertical(const simdscalari (&vX)[3], const simdscalari (&vY)[3], simdBBox &bbox)
-{
- simdscalari vMinX = vX[0];
- vMinX = _simd_min_epi32(vMinX, vX[1]);
- vMinX = _simd_min_epi32(vMinX, vX[2]);
-
- simdscalari vMaxX = vX[0];
- vMaxX = _simd_max_epi32(vMaxX, vX[1]);
- vMaxX = _simd_max_epi32(vMaxX, vX[2]);
-
- simdscalari vMinY = vY[0];
- vMinY = _simd_min_epi32(vMinY, vY[1]);
- vMinY = _simd_min_epi32(vMinY, vY[2]);
-
- simdscalari vMaxY = vY[0];
- vMaxY = _simd_max_epi32(vMaxY, vY[1]);
- vMaxY = _simd_max_epi32(vMaxY, vY[2]);
-
- bbox.left = vMinX;
- bbox.right = vMaxX;
- bbox.top = vMinY;
- bbox.bottom = vMaxY;
-}
-
-INLINE
bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
{
const API_STATE& state = GetApiState(pDC);
@@ -334,8 +294,9 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uin
void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
void ProcessQueryStats(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
+PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
+
struct PA_STATE_BASE; // forward decl
-void BinTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector tri[3], uint32_t primMask, simdscalari primID);
void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
index bcfeef48410..d3faf2aa6c9 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
@@ -28,8 +28,16 @@
#pragma once
#include "context.h"
+#include <type_traits>
extern PFN_WORK_FUNC gRasterizerTable[2][SWR_MULTISAMPLE_TYPE_MAX];
void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
+
+INLINE
+__m128i fpToFixedPoint(const __m128 vIn)
+{
+ __m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
+ return _mm_cvtps_epi32(vFixed);
+} \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 5156c6b1322..05735b3f32b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -909,6 +909,7 @@ struct SWR_RASTSTATE
uint32_t forcedSampleCount : 1;
uint32_t pixelOffset : 1;
uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
+ uint32_t conservativeRast : 1;
float pointSize;
float lineWidth;
@@ -989,6 +990,7 @@ enum SWR_INPUT_COVERAGE
{
SWR_INPUT_COVERAGE_NONE,
SWR_INPUT_COVERAGE_NORMAL,
+ SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
SWR_INPUT_COVERAGE_MAX,
};
@@ -1016,7 +1018,7 @@ struct SWR_PS_STATE
// dword 2
uint32_t killsPixel : 1; // pixel shader can kill pixels
- uint32_t inputCoverage : 1; // type of input coverage PS uses
+ uint32_t inputCoverage : 1; // ps uses input coverage
uint32_t writesODepth : 1; // pixel shader writes to depth
uint32_t usesSourceDepth : 1; // pixel shader reads depth
uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h
index 2853f98fd78..f1b028ec681 100644
--- a/src/gallium/drivers/swr/rasterizer/core/utils.h
+++ b/src/gallium/drivers/swr/rasterizer/core/utils.h
@@ -849,6 +849,36 @@ struct TemplateArgUnroller
return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
}
+
+ // Last Arg Terminator
+ template <typename... TArgsT>
+ static typename TermT::FuncType GetFunc(uint32_t iArg)
+ {
+ switch(iArg)
+ {
+ case 0: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 0>>();
+ case 1: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 1>>();
+ case 2: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 2>>();
+ case 3: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 3>>();
+ case 4: return TermT::template GetFunc<ArgsB..., std::integral_constant<uint32_t, 4>>();
+ default: SWR_ASSUME(false); return nullptr;
+ }
+ }
+
+ // Recursively parse args
+ template <typename... TArgsT>
+ static typename TermT::FuncType GetFunc(uint32_t iArg, TArgsT... remainingArgs)
+ {
+ switch(iArg)
+ {
+ case 0: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 0>>::GetFunc(remainingArgs...);
+ case 1: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 1>>::GetFunc(remainingArgs...);
+ case 2: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 2>>::GetFunc(remainingArgs...);
+ case 3: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 3>>::GetFunc(remainingArgs...);
+ case 4: return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<uint32_t, 4>>::GetFunc(remainingArgs...);
+ default: SWR_ASSUME(false); return nullptr;
+ }
+ }
};