diff options
Diffstat (limited to 'src/gallium/drivers/swr')
5 files changed, 332 insertions, 144 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h index ca4c19ec90d..1bc3938595c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h +++ b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h @@ -109,8 +109,6 @@ template <> struct ConservativeRastFETraits<StandardRastT> { typedef std::false_type IsConservativeT; - typedef FixedPointTraits<Fixed_16_8> BBoxPrecisionT; - typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT; }; ////////////////////////////////////////////////////////////////////////// @@ -119,13 +117,7 @@ template <> struct ConservativeRastFETraits<ConservativeRastT> { typedef std::true_type IsConservativeT; - typedef FixedPointTraits<Fixed_16_8> ZeroAreaPrecisionT; - - /// Conservative bounding box needs to expand the area around each vertex by 1/512, which - /// is the potential snapping error when going from FP-> 16.8 fixed - typedef FixedPointTraits<Fixed_16_9> BBoxPrecisionT; typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT; - typedef std::integral_constant<uint32_t, 1> BoundingBoxShiftT; }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index cb13b50df14..c797c042d1c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -1446,7 +1446,7 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc( /// @param pLinkageMap - maps VS attribute slot to PS slot /// @param triIndex - Triangle to process attributes for /// @param pBuffer - Output result -template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT> +template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT, typename IsDegenerate> INLINE void ProcessAttributes( DRAW_CONTEXT *pDC, PA_STATE&pa, @@ -1456,7 +1456,8 @@ INLINE void ProcessAttributes( { static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT"); const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState; - LONG constantInterpMask = backendState.constantInterpolationMask; + // Conservative Rasterization requires degenerate tris to have constant attribute interpolation + LONG constantInterpMask = IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask; const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex; const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology; @@ -1483,7 +1484,7 @@ INLINE void ProcessAttributes( __m128 attrib[3]; // triangle attribs (always 4 wide) float* pAttribStart = pBuffer; - if (HasConstantInterpT::value) + if (HasConstantInterpT::value || IsDegenerate::value) { if (_bittest(&constantInterpMask, i)) { @@ -1605,9 +1606,9 @@ struct ProcessAttributesChooser } }; -PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp) +PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp, bool IsDegenerate = false) { - return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp); + return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate); } ////////////////////////////////////////////////////////////////////////// @@ -1668,38 +1669,19 @@ INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn) ////////////////////////////////////////////////////////////////////////// /// @brief Helper function to set the X,Y coords of a triangle to the -/// requested Fixed Point precision from FP32. If the RequestedT -/// FixedPointTraits precision is the same as the CurrentT, no extra -/// conversions will be done. If they are different, convert from FP32 -/// to the Requested precision and set vXi, vYi -/// @tparam RequestedT: requested FixedPointTraits type -/// @tparam CurrentT: FixedPointTraits type of the last -template<typename RequestedT, typename CurrentT = FixedPointTraits<Fixed_Uninit>> -struct FPToFixedPoint +/// requested Fixed Point precision from FP32. +/// @param tri: simdvector[3] of FP triangle verts +/// @param vXi: fixed point X coords of tri verts +/// @param vYi: fixed point Y coords of tri verts +INLINE static void FPToFixedPoint(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]) { - ////////////////////////////////////////////////////////////////////////// - /// @param tri: simdvector[3] of FP triangle verts - /// @param vXi: fixed point X coords of tri verts - /// @param vYi: fixed point Y coords of tri verts - INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]) - { - vXi[0] = fpToFixedPointVertical<RequestedT>(tri[0].x); - vYi[0] = fpToFixedPointVertical<RequestedT>(tri[0].y); - vXi[1] = fpToFixedPointVertical<RequestedT>(tri[1].x); - vYi[1] = fpToFixedPointVertical<RequestedT>(tri[1].y); - vXi[2] = fpToFixedPointVertical<RequestedT>(tri[2].x); - vYi[2] = fpToFixedPointVertical<RequestedT>(tri[2].y); - }; -}; - -////////////////////////////////////////////////////////////////////////// -/// @brief In the case where the RequestedT and CurrentT fixed point -/// precisions are the same, do nothing. -template<typename RequestedT> -struct FPToFixedPoint<RequestedT, RequestedT> -{ - INLINE static void Set(const simdvector * const tri, simdscalari (&vXi)[3], simdscalari (&vYi)[3]){}; -}; + vXi[0] = fpToFixedPointVertical(tri[0].x); + vYi[0] = fpToFixedPointVertical(tri[0].y); + vXi[1] = fpToFixedPointVertical(tri[1].x); + vYi[1] = fpToFixedPointVertical(tri[1].y); + vXi[2] = fpToFixedPointVertical(tri[2].x); + vYi[2] = fpToFixedPointVertical(tri[2].y); +} ////////////////////////////////////////////////////////////////////////// /// @brief Calculate bounding box for current triangle @@ -1710,20 +1692,8 @@ struct FPToFixedPoint<RequestedT, RequestedT> /// *Note*: expects vX, vY to be in the correct precision for the type /// of rasterization. This avoids unnecessary FP->fixed conversions. template <typename CT> -INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox){} - -////////////////////////////////////////////////////////////////////////// -/// @brief FEStandardRastT specialization of calcBoundingBoxIntVertical -template <> -INLINE void calcBoundingBoxIntVertical<FEStandardRastT>(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox) +INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari (&vX)[3], simdscalari (&vY)[3], simdBBox &bbox) { - // FE conservative rast traits - typedef FEStandardRastT CT; - - static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Standard rast BBox calculation needs to be in 16.8 precision"); - // Update vXi, vYi fixed point precision for BBox calculation if necessary - FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY); - simdscalari vMinX = vX[0]; vMinX = _simd_min_epi32(vMinX, vX[1]); vMinX = _simd_min_epi32(vMinX, vX[2]); @@ -1755,10 +1725,6 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c // FE conservative rast traits typedef FEConservativeRastT CT; - static_assert(std::is_same<CT::BBoxPrecisionT, FixedPointTraits<Fixed_16_9>>::value, "Conservative rast BBox calculation needs to be in 16.9 precision"); - // Update vXi, vYi fixed point precision for BBox calculation if necessary - FPToFixedPoint<CT::BBoxPrecisionT, CT::ZeroAreaPrecisionT>::Set(tri, vX, vY); - simdscalari vMinX = vX[0]; vMinX = _simd_min_epi32(vMinX, vX[1]); vMinX = _simd_min_epi32(vMinX, vX[2]); @@ -1776,10 +1742,11 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c vMaxY = _simd_max_epi32(vMaxY, vY[2]); /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization - bbox.left = _simd_srli_epi32(_simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); - bbox.right = _simd_srli_epi32(_simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); - bbox.top = _simd_srli_epi32(_simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); - bbox.bottom = _simd_srli_epi32(_simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)), CT::BoundingBoxShiftT::value); + /// expand bbox by 1/256; coverage will be correctly handled in the rasterizer. + bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); + bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); + bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); + bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); } ////////////////////////////////////////////////////////////////////////// @@ -1808,10 +1775,6 @@ void BinTriangles( const SWR_GS_STATE& gsState = state.gsState; MacroTileMgr *pTileMgr = pDC->pTileMgr; - // Select attribute processor - PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3, - state.backendState.swizzleEnable, state.backendState.constantInterpolationMask); - simdscalar vRecipW0 = _simd_set1_ps(1.0f); simdscalar vRecipW1 = _simd_set1_ps(1.0f); @@ -1852,8 +1815,8 @@ void BinTriangles( tri[2].y = _simd_add_ps(tri[2].y, offset); simdscalari vXi[3], vYi[3]; - // Set vXi, vYi to fixed point precision required for degenerate triangle check - FPToFixedPoint<typename CT::ZeroAreaPrecisionT>::Set(tri, vXi, vYi); + // Set vXi, vYi to required fixed point precision + FPToFixedPoint(tri, vXi, vYi); // triangle setup simdscalari vAi[3], vBi[3]; @@ -1863,8 +1826,6 @@ void BinTriangles( simdscalari vDet[2]; calcDeterminantIntVertical(vAi, vBi, vDet); - /// todo: handle degen tri's for Conservative Rast. - // cull zero area int maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[0], _simd_setzero_si()))); int maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpeq_epi64(vDet[1], _simd_setzero_si()))); @@ -1872,11 +1833,15 @@ void BinTriangles( int cullZeroAreaMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH / 2)); uint32_t origTriMask = triMask; - triMask &= ~cullZeroAreaMask; + // don't cull degenerate triangles if we're conservatively rasterizing + if(!CT::IsConservativeT::value) + { + triMask &= ~cullZeroAreaMask; + } // determine front winding tris // CW +det - // CCW -det + // CCW det <= 0; 0 area triangles are marked as backfacing, which is required behavior for conservative rast maskLo = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[0], _simd_setzero_si()))); maskHi = _simd_movemask_pd(_simd_castsi_pd(_simd_cmpgt_epi64(vDet[1], _simd_setzero_si()))); int cwTriMask = maskLo | (maskHi << (KNOB_SIMD_WIDTH /2) ); @@ -1898,6 +1863,7 @@ void BinTriangles( case SWR_CULLMODE_BOTH: cullTris = 0xffffffff; break; case SWR_CULLMODE_NONE: cullTris = 0x0; break; case SWR_CULLMODE_FRONT: cullTris = frontWindingTris; break; + // 0 area triangles are marked as backfacing, which is required behavior for conservative rast case SWR_CULLMODE_BACK: cullTris = ~frontWindingTris; break; default: SWR_ASSERT(false, "Invalid cull mode: %d", rastState.cullMode); cullTris = 0x0; break; } @@ -1916,9 +1882,53 @@ void BinTriangles( DWORD triIndex = 0; // for center sample pattern, all samples are at pixel center; calculate coverage // once at center and broadcast the results in the backend - uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X; - PFN_WORK_FUNC pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0), - pDC->pState->state.psState.inputCoverage, (rastState.scissorEnable > 0)); + const SWR_MULTISAMPLE_COUNT sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X; + uint32_t edgeEnable; + PFN_WORK_FUNC pfnWork; + if(CT::IsConservativeT::value) + { + // determine which edges of the degenerate tri, if any, are valid to rasterize. + // used to call the appropriate templated rasterizer function + if(cullZeroAreaMask > 0) + { + // e0 = v1-v0 + simdscalari x0x1Mask = _simd_cmpeq_epi32(vXi[0], vXi[1]); + simdscalari y0y1Mask = _simd_cmpeq_epi32(vYi[0], vYi[1]); + uint32_t e0Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x0x1Mask, y0y1Mask))); + + // e1 = v2-v1 + simdscalari x1x2Mask = _simd_cmpeq_epi32(vXi[1], vXi[2]); + simdscalari y1y2Mask = _simd_cmpeq_epi32(vYi[1], vYi[2]); + uint32_t e1Mask = _simd_movemask_ps(_simd_castsi_ps(_simd_and_si(x1x2Mask, y1y2Mask))); + + // e2 = v0-v2 + // if v0 == v1 & v1 == v2, v0 == v2 + uint32_t e2Mask = e0Mask & e1Mask; + SWR_ASSERT(KNOB_SIMD_WIDTH == 8, "Need to update degenerate mask code for avx512"); + + // edge order: e0 = v0v1, e1 = v1v2, e2 = v0v2 + // 32 bit binary: 0000 0000 0010 0100 1001 0010 0100 1001 + e0Mask = pdep_u32(e0Mask, 0x00249249); + // 32 bit binary: 0000 0000 0100 1001 0010 0100 1001 0010 + e1Mask = pdep_u32(e1Mask, 0x00492492); + // 32 bit binary: 0000 0000 1001 0010 0100 1001 0010 0100 + e2Mask = pdep_u32(e2Mask, 0x00924924); + + edgeEnable = (0x00FFFFFF & (~(e0Mask | e1Mask | e2Mask))); + } + else + { + edgeEnable = 0x00FFFFFF; + } + } + else + { + // degenerate triangles won't be sent to rasterizer; just enable all edges + pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0), + (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID, + (rastState.scissorEnable > 0)); + } + if (!triMask) { goto endBinTriangles; @@ -1969,6 +1979,16 @@ void BinTriangles( bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right)); bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom)); + if(CT::IsConservativeT::value) + { + // in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has + // some area. Bump the right/bottom edges out + simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom); + bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom); + simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right); + bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight); + } + // Cull tris completely outside scissor { simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right); @@ -2026,7 +2046,28 @@ void BinTriangles( BE_WORK work; work.type = DRAW; - work.pfnWork = pfnWork; + + bool isDegenerate; + if(CT::IsConservativeT::value) + { + // only rasterize valid edges if we have a degenerate primitive + int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID; + work.pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0), + (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable, + (rastState.scissorEnable > 0)); + + // Degenerate triangles are required to be constant interpolated + isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false; + } + else + { + isDegenerate = false; + work.pfnWork = pfnWork; + } + + // Select attribute processor + PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3, + state.backendState.swizzleEnable, state.backendState.constantInterpolationMask, isDegenerate); TRIANGLE_WORK_DESC &desc = work.desc.tri; diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp index 1909ddbcd6a..c3ac384e27a 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp @@ -88,7 +88,7 @@ struct EDGE /// @param vA, vB - A & B coefs for each edge of the triangle (Ax + Bx + C) /// @param vStepQuad0-2 - edge equations evaluated at the UL corners of the 2x2 pixel quad. /// Used to step between quads when sweeping over the raster tile. -template<uint32_t NumEdges> +template<uint32_t NumEdges, typename EdgeMaskT> INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdges], EDGE *pRastEdges) { uint64_t coverageMask = 0; @@ -120,25 +120,25 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg // evaluate which pixels in the quad are covered #define EVAL \ - UnrollerL<0, NumEdges, 1>::step(eval_lambda); + UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda); // update coverage mask #define UPDATE_MASK(bit) \ mask = edgeMask[0]; \ - UnrollerL<1, NumEdges, 1>::step(update_lambda); \ + UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \ coverageMask |= (mask << bit); // step in the +x direction to the next quad #define INCX \ - UnrollerL<0, NumEdges, 1>::step(incx_lambda); + UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda); // step in the +y direction to the next quad #define INCY \ - UnrollerL<0, NumEdges, 1>::step(incy_lambda); + UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda); // step in the -x direction to the next quad #define DECX \ - UnrollerL<0, NumEdges, 1>::step(decx_lambda); + UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda); // sweep 2x2 quad back and forth through the raster tile, // computing coverage masks for the entire tile @@ -275,6 +275,17 @@ INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256 } ////////////////////////////////////////////////////////////////////////// +/// @brief calculates difference in precision between the result of manh +/// calculation and the edge precision, based on compile time trait values +template<typename RT> +constexpr int64_t ManhToEdgePrecisionAdjust() +{ + static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value, + "Inadequate precision of result of manh calculation "); + return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value); +} + +////////////////////////////////////////////////////////////////////////// /// @struct adjustEdgeConservative /// @brief Primary template definition used for partially specializing /// the adjustEdgeConservative function. This struct should never @@ -306,15 +317,15 @@ struct adjustEdgeConservative<RT, std::true_type> /// instead of having to test individual pixel corners for conservative coverage INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge) { - /// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away - /// from the pixel center (in the direction of the edge normal A/B) + // Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away + // from the pixel center (in the direction of the edge normal A/B) - /// edge = Ax + Bx + C - (manh/e) - /// manh = manhattan distance = abs(A) + abs(B) - /// e = absolute rounding error from snapping from float to fixed point precision + // edge = Ax + Bx + C - (manh/e) + // manh = manhattan distance = abs(A) + abs(B) + // e = absolute rounding error from snapping from float to fixed point precision - /// 'fixed point' multiply (in double to be avx1 friendly) - /// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example + // 'fixed point' multiply (in double to be avx1 friendly) + // need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example __m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)), vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi)); __m256d manh = _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)), _mm256_mul_pd(vBai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value))); @@ -322,15 +333,13 @@ struct adjustEdgeConservative<RT, std::true_type> static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value, "Inadequate precision of result of manh calculation "); - /// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision - /// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right - manh = _mm256_mul_pd(manh, _mm256_set1_pd(((RT::PrecisionT::BitsT::value + - RT::ConservativePrecisionT::BitsT::value) - - RT::EdgePrecisionT::BitsT::value) * 0.5)); - - /// move the edge away from the pixel center by the required conservative precision + 1/2 pixel - /// this allows the rasterizer to do a single conservative coverage test to see if the primitive - /// intersects the pixel at all + // rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision + // since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right + manh = _mm256_mul_pd(manh, _mm256_set1_pd(ManhToEdgePrecisionAdjust<RT>() * 0.5)); + + // move the edge away from the pixel center by the required conservative precision + 1/2 pixel + // this allows the rasterizer to do a single conservative coverage test to see if the primitive + // intersects the pixel at all vEdge = _mm256_sub_pd(vEdge, manh); }; }; @@ -347,6 +356,19 @@ struct adjustEdgeConservative<RT, std::false_type> }; ////////////////////////////////////////////////////////////////////////// +/// @brief calculates the distance a degenerate BBox needs to be adjusted +/// for conservative rast based on compile time trait values +template<typename RT> +constexpr int64_t ConservativeScissorOffset() +{ + static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0, "Rasterizer precision > conservative precision"); + // if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox when calculating scissor edges + typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1> DegenerateEdgeOffsetT; + // 1/2 pixel edge offset + conservative offset - degenerateTriangle + return RT::ConservativeEdgeOffsetT::value - (DegenerateEdgeOffsetT::value << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value)); +} + +////////////////////////////////////////////////////////////////////////// /// @brief Performs calculations to adjust each a scalar edge out /// from the pixel center by 1/2 pixel + uncertainty region in both the x and y /// direction. @@ -354,13 +376,7 @@ template <typename RT> INLINE void adjustScissorEdge(const double a, const double b, __m256d &vEdge) { int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b)); - - int64_t manh = ((aabs * RT::ConservativeEdgeOffsetT::value) + (babs * RT::ConservativeEdgeOffsetT::value)) >> - ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value); - - static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value, - "Inadequate precision of result of manh calculation "); - + int64_t manh = ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >> ManhToEdgePrecisionAdjust<RT>(); vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh)); }; @@ -371,7 +387,7 @@ INLINE void adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vE { static_assert(std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value, "Edge equation expected to be in x.16 fixed point"); - /// need to offset the edge before applying the top-left rule + // need to offset the edge before applying the top-left rule adjustEdgeConservative<RT, typename RT::IsConservativeT>(vAi, vBi, vEdge); adjustTopLeftRuleIntFix16(vAi, vBi, vEdge); @@ -563,14 +579,13 @@ struct ComputeScissorEdges template <typename RT> struct ComputeScissorEdges<std::true_type, std::true_type, RT> { - ////////////////////////////////////////////////////////////////////////// /// @brief Intersect tri bbox with scissor, compute scissor edge vectors, /// evaluate edge equations and offset them away from pixel center. INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y, EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]) { - /// if conservative rasterizing, triangle bbox intersected with scissor bbox is used + // if conservative rasterizing, triangle bbox intersected with scissor bbox is used BBOX scissor; scissor.left = std::max(triBBox.left, scissorBBox.left); scissor.right = std::min(triBBox.right, scissorBBox.right); @@ -593,7 +608,7 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT> vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom))); vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top))); - /// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing + // if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]); adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]); adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]); @@ -632,6 +647,81 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT> } }; +////////////////////////////////////////////////////////////////////////// +/// @brief Primary function template for TrivialRejectTest. Should +/// never be called, but TemplateUnroller instantiates a few unused values, +/// so it calls a runtime assert instead of a static_assert. +template <typename ValidEdgeMaskT> +INLINE bool TrivialRejectTest(const int, const int, const int) +{ + SWR_ASSERT(0, "Primary templated function should never be called"); + return false; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief E0E1ValidT specialization of TrivialRejectTest. Tests edge 0 +/// and edge 1 for trivial coverage reject +template <> +INLINE bool TrivialRejectTest<E0E1ValidT>(const int mask0, const int mask1, const int) +{ + return (!(mask0 && mask1)) ? true : false; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief E0E2ValidT specialization of TrivialRejectTest. Tests edge 0 +/// and edge 2 for trivial coverage reject +template <> +INLINE bool TrivialRejectTest<E0E2ValidT>(const int mask0, const int, const int mask2) +{ + return (!(mask0 && mask2)) ? true : false; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief E1E2ValidT specialization of TrivialRejectTest. Tests edge 1 +/// and edge 2 for trivial coverage reject +template <> +INLINE bool TrivialRejectTest<E1E2ValidT>(const int, const int mask1, const int mask2) +{ + return (!(mask1 && mask2)) ? true : false; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief AllEdgesValidT specialization of TrivialRejectTest. Tests all +/// primitive edges for trivial coverage reject +template <> +INLINE bool TrivialRejectTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2) +{ + return (!(mask0 && mask1 && mask2)) ? true : false;; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief NoEdgesValidT specialization of TrivialRejectTest. Degenerate +/// point, so return false and rasterize against conservative BBox +template <> +INLINE bool TrivialRejectTest<NoEdgesValidT>(const int, const int, const int) +{ + return false; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief Primary function template for TrivialAcceptTest. Always returns +/// false, since it will only be called for degenerate tris, and as such +/// will never cover the entire raster tile +template <typename ValidEdgeMaskT> +INLINE bool TrivialAcceptTest(const int, const int, const int) +{ + return false; +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief AllEdgesValidT specialization for TrivialAcceptTest. Test all +/// edge masks for a fully covered raster tile +template <> +INLINE bool TrivialAcceptTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2) +{ + return ((mask0 & mask1 & mask2) == 0xf); +}; + template <typename RT> void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc) { @@ -681,8 +771,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, // determinant float det = calcDeterminantInt(vAi, vBi); - /// Verts in Pixel Coordinate Space at this point - /// Det > 0 = CW winding order + // Verts in Pixel Coordinate Space at this point + // Det > 0 = CW winding order // Convert CW triangles to CCW if (det > 0.0) { @@ -693,28 +783,39 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, det = -det; } - /// @todo: handle degenerates for ConservativeRast - __m128 vC; // Finish triangle setup - C edge coef triangleSetupC(vX, vY, vA, vB, vC); - // compute barycentric i and j - // i = (A1x + B1y + C1)/det - // j = (A2x + B2y + C2)/det - __m128 vDet = _mm_set1_ps(det); - __m128 vRecipDet = _mm_div_ps(_mm_set1_ps(1.0f), vDet);//_mm_rcp_ps(vDet); - _mm_store_ss(&triDesc.recipDet, vRecipDet); - - // only extract coefs for 2 of the barycentrics; the 3rd can be - // determined from the barycentric equation: - // i + j + k = 1 <=> k = 1 - j - i - _MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1); - _MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1); - _MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1); - _MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2); - _MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2); - _MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2); + if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID) + { + // If we have degenerate edge(s) to rasterize, set I and J coefs + // to 0 for constant interpolation of attributes + triDesc.I[0] = 0.0f; + triDesc.I[1] = 0.0f; + triDesc.I[2] = 0.0f; + triDesc.J[0] = 0.0f; + triDesc.J[1] = 0.0f; + triDesc.J[2] = 0.0f; + + // Degenerate triangles have no area + triDesc.recipDet = 0.0f; + } + else + { + // only extract coefs for 2 of the barycentrics; the 3rd can be + // determined from the barycentric equation: + // i + j + k = 1 <=> k = 1 - j - i + _MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1); + _MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1); + _MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1); + _MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2); + _MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2); + _MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2); + + // compute recipDet, used to calculate barycentric i and j in the backend + triDesc.recipDet = 1.0f/det; + } OSALIGNSIMD(float) oneOverW[4]; _mm_store_ps(oneOverW, vRecipW); @@ -764,6 +865,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, OSALIGNSIMD(BBOX) bbox; calcBoundingBoxInt(vXi, vYi, bbox); + if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID) + { + // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid + bbox.left--; bbox.right++; bbox.top--; bbox.bottom++; + SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0, + "Conservative rast degenerate handling requires a valid scissor rect"); + } + // Intersect with scissor/viewport OSALIGNSIMD(BBOX) intersect; intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left); @@ -941,13 +1050,13 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, for (uint32_t sampleNum = 0; sampleNum < NumRasterSamplesT::value; sampleNum++) { // trivial reject, at least one edge has all 4 corners of raster tile outside - bool trivialReject = (!(mask0 && mask1 && mask2)) ? true : false; + bool trivialReject = TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2); if (!trivialReject) { // trivial accept mask triDesc.coverageMask[sampleNum] = 0xffffffffffffffffULL; - if ((mask0 & mask1 & mask2) == 0xf) + if (TrivialAcceptTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2)) { triDesc.anyCoveredSamples = triDesc.coverageMask[sampleNum]; // trivial accept, all 4 corners of all 3 edges are negative @@ -991,7 +1100,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, // not trivial accept or reject, must rasterize full tile RDTSC_START(BERasterizePartial); - triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value>(pDC, startQuadEdges, rastEdges); + triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges); RDTSC_STOP(BERasterizePartial, 0, 0); triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum]; @@ -1101,7 +1210,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, // once at center and broadcast the results in the backend uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X; // conservative rast not supported for points/lines - pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0)); + pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0)); // overwrite texcoords for point sprites if (isPointSpriteTexCoordEnabled) @@ -1429,7 +1538,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi PFN_WORK_FUNC pfnTriRast; uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X; // conservative rast not supported for points/lines - pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0)); + pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (rastState.scissorEnable > 0)); // make sure this macrotile intersects the triangle __m128i vXai = fpToFixedPoint(vXa); @@ -1541,6 +1650,7 @@ PFN_WORK_FUNC GetRasterizerFunc( uint32_t numSamples, bool IsConservative, uint32_t InputCoverage, + uint32_t EdgeEnable, bool RasterizeScissorEdges ) { @@ -1548,5 +1658,6 @@ PFN_WORK_FUNC GetRasterizerFunc( IntArg<SWR_MULTISAMPLE_1X,SWR_MULTISAMPLE_TYPE_COUNT-1>{numSamples}, IsConservative, IntArg<SWR_INPUT_COVERAGE_NONE, SWR_INPUT_COVERAGE_COUNT-1>{InputCoverage}, + IntArg<0, VALID_TRI_EDGE_COUNT-1>{EdgeEnable}, RasterizeScissorEdges); } diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h index 1b5cac26be5..457153b7fbd 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h @@ -48,8 +48,28 @@ PFN_WORK_FUNC GetRasterizerFunc( uint32_t numSamples, bool IsConservative, uint32_t InputCoverage, + uint32_t EdgeEnable, bool RasterizeScissorEdges); +enum ValidTriEdges +{ + NO_VALID_EDGES = 0, + E0_E1_VALID = 0x3, + E0_E2_VALID = 0x5, + E1_E2_VALID = 0x6, + ALL_EDGES_VALID = 0x7, + VALID_TRI_EDGE_COUNT, +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief ValidTriEdges convenience typedefs used for templated function +/// specialization supported Fixed Point precisions +typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> AllEdgesValidT; +typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT; +typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT; +typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT; +typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT; + ////////////////////////////////////////////////////////////////////////// /// @struct RasterScissorEdgesT /// @brief Primary RasterScissorEdgesT templated struct that holds compile @@ -59,22 +79,26 @@ PFN_WORK_FUNC GetRasterizerFunc( /// 3 triangle edges + 4 scissor edges for coverage. /// @tparam RasterScissorEdgesT: number of multisamples /// @tparam ConservativeT: is this a conservative rasterization -template <typename RasterScissorEdgesT, typename ConservativeT> +/// @tparam EdgeMaskT: Which edges are valid(not degenerate) +template <typename RasterScissorEdgesT, typename ConservativeT, typename EdgeMaskT> struct RasterEdgeTraits { typedef std::true_type RasterizeScissorEdgesT; typedef std::integral_constant<uint32_t, 7> NumEdgesT; + typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT; }; ////////////////////////////////////////////////////////////////////////// /// @brief specialization of RasterEdgeTraits. If neither scissor rect /// nor conservative rast is enabled, only test 3 triangle edges /// for coverage -template <> -struct RasterEdgeTraits<std::false_type, std::false_type> +template <typename EdgeMaskT> +struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT> { typedef std::false_type RasterizeScissorEdgesT; typedef std::integral_constant<uint32_t, 3> NumEdgesT; + // no need for degenerate edge masking in non-conservative case; rasterize all triangle edges + typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> ValidEdgeMaskT; }; ////////////////////////////////////////////////////////////////////////// @@ -86,19 +110,19 @@ struct RasterEdgeTraits<std::false_type, std::false_type> /// @tparam InputCoverageT: what type of input coverage is the PS expecting? /// (only used with conservative rasterization) /// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor? -template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename RasterScissorEdgesT> +template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename EdgeEnableT, typename RasterScissorEdgesT> struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>, - public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT> + public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>> { typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value)> MT; - + /// Fixed point precision the rasterizer is using typedef FixedPointTraits<Fixed_16_8> PrecisionT; /// Fixed point precision of the edge tests used during rasterization typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT; // If conservative rast is enabled, only need a single sample coverage test, with the result copied to all samples - typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT; + typedef std::integral_constant<int, (ConservativeT::value) ? 1 : MT::numSamples> NumRasterSamplesT; static_assert(EdgePrecisionT::BitsT::value >= ConservativeRastBETraits<ConservativeT, InputCoverageT>::ConservativePrecisionT::BitsT::value, "Rasterizer edge fixed point precision < required conservative rast precision"); diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h index 0cfdb84c73b..79f45ebf25d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/utils.h +++ b/src/gallium/drivers/swr/rasterizer/core/utils.h @@ -831,6 +831,26 @@ struct UnrollerL<End, End, Step> { } }; +// helper function to unroll loops, with mask to skip specific iterations +template<int Begin, int End, int Step = 1, int Mask = 0x7f> +struct UnrollerLMask { + template<typename Lambda> + INLINE static void step(Lambda& func) { + if(Mask & (1 << Begin)) + { + func(Begin); + } + UnrollerL<Begin + Step, End, Step>::step(func); + } +}; + +template<int End, int Step, int Mask> +struct UnrollerLMask<End, End, Step, Mask> { + template<typename Lambda> + static void step(Lambda& func) { + } +}; + // general CRC compute INLINE uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size) |