diff options
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 4 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/backend.cpp | 334 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/backend.h | 50 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/multisample.cpp | 13 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/multisample.h | 470 |
5 files changed, 633 insertions, 238 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index e950e92c874..3b02d197111 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -763,7 +763,6 @@ extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_IN extern PFN_OUTPUT_MERGER gBackendOutputMergerTable[SWR_NUM_RENDERTARGETS + 1][SWR_MULTISAMPLE_TYPE_MAX]; extern PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable[2]; extern PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable[2]; -extern PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2]; void SetupPipeline(DRAW_CONTEXT *pDC) { DRAW_STATE* pState = pDC->pState; @@ -827,9 +826,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC) bBarycentrics = (barycentricsMask & SWR_BARYCENTRIC_PER_SAMPLE_MASK) > 0 ? 1 : 0; backendFuncs.pfnCalcSampleBarycentrics = gSampleBarycentricTable[bBarycentrics]; - - bBarycentrics = (barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0 ? 1 : 0; - backendFuncs.pfnCalcCentroidBarycentrics = gCentroidBarycentricTable[rastState.sampleCount][bBarycentrics][rastState.samplePattern][forcedSampleCount]; } PFN_PROCESS_PRIMS pfnBinner; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index a2212ba8aa4..310a7edcde1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -459,10 +459,10 @@ simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscala return _simd_movemask_ps(vClipMask); } -template<bool perspMask> +template<bool bGenerateBarycentrics> INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext) { - if(perspMask) + if(bGenerateBarycentrics) { // evaluate I,J psContext.vI.center = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center); @@ -475,10 +475,10 @@ INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEX } } -template<bool perspMask> +template<bool bGenerateBarycentrics> INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext) { - if(perspMask) + if(bGenerateBarycentrics) { // evaluate I,J psContext.vI.sample = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample); @@ -502,13 +502,12 @@ INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTE // evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the pixel, then the first sample covered by the // SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point. //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -template<SWR_MULTISAMPLE_COUNT sampleCount, bool bForcedSampleCount> +template<typename T> INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask, const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL) { uint32_t inputMask[KNOB_SIMD_WIDTH]; - - generateInputCoverage<sampleCount, 1, bForcedSampleCount>(coverageMask, inputMask, sampleMask); + generateInputCoverage<T>(coverageMask, inputMask, sampleMask); // Case (2) - partially covered pixel @@ -524,29 +523,29 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov (inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0); // look up and set the sample offsets from UL pixel corner for first covered sample - __m256 vXSample = _mm256_set_ps(MultisampleTraits<sampleCount>::X(sampleNum[7]), - MultisampleTraits<sampleCount>::X(sampleNum[6]), - MultisampleTraits<sampleCount>::X(sampleNum[5]), - MultisampleTraits<sampleCount>::X(sampleNum[4]), - MultisampleTraits<sampleCount>::X(sampleNum[3]), - MultisampleTraits<sampleCount>::X(sampleNum[2]), - MultisampleTraits<sampleCount>::X(sampleNum[1]), - MultisampleTraits<sampleCount>::X(sampleNum[0])); - - __m256 vYSample = _mm256_set_ps(MultisampleTraits<sampleCount>::Y(sampleNum[7]), - MultisampleTraits<sampleCount>::Y(sampleNum[6]), - MultisampleTraits<sampleCount>::Y(sampleNum[5]), - MultisampleTraits<sampleCount>::Y(sampleNum[4]), - MultisampleTraits<sampleCount>::Y(sampleNum[3]), - MultisampleTraits<sampleCount>::Y(sampleNum[2]), - MultisampleTraits<sampleCount>::Y(sampleNum[1]), - MultisampleTraits<sampleCount>::Y(sampleNum[0])); + __m256 vXSample = _mm256_set_ps(T::MultisampleT::X(sampleNum[7]), + T::MultisampleT::X(sampleNum[6]), + T::MultisampleT::X(sampleNum[5]), + T::MultisampleT::X(sampleNum[4]), + T::MultisampleT::X(sampleNum[3]), + T::MultisampleT::X(sampleNum[2]), + T::MultisampleT::X(sampleNum[1]), + T::MultisampleT::X(sampleNum[0])); + + __m256 vYSample = _mm256_set_ps(T::MultisampleT::Y(sampleNum[7]), + T::MultisampleT::Y(sampleNum[6]), + T::MultisampleT::Y(sampleNum[5]), + T::MultisampleT::Y(sampleNum[4]), + T::MultisampleT::Y(sampleNum[3]), + T::MultisampleT::Y(sampleNum[2]), + T::MultisampleT::Y(sampleNum[1]), + T::MultisampleT::Y(sampleNum[0])); // add sample offset to UL pixel corner vXSample = _simd_add_ps(vXSamplePosUL, vXSample); vYSample = _simd_add_ps(vYSamplePosUL, vYSample); // Case (1) and case (3b) - All samples covered or not covered with full SampleMask - static const __m256i vFullyCoveredMask = MultisampleTraits<sampleCount>::FullSampleMask(); + static const __m256i vFullyCoveredMask = T::MultisampleT::FullSampleMask(); __m256i vInputCoveragei = _mm256_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]); __m256i vAllSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vFullyCoveredMask); @@ -570,46 +569,38 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov __m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples); - vXSample = _simd_set1_ps(MultisampleTraits<sampleCount>::X(firstCoveredSampleMaskSample)); - vYSample = _simd_set1_ps(MultisampleTraits<sampleCount>::Y(firstCoveredSampleMaskSample)); + vXSample = _simd_set1_ps(T::MultisampleT::X(firstCoveredSampleMaskSample)); + vYSample = _simd_set1_ps(T::MultisampleT::Y(firstCoveredSampleMaskSample)); // blend in case 3a pixel locations psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a)); psContext.vY.centroid = _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a)); } -template<uint32_t sampleCount, uint32_t persp, uint32_t standardPattern, uint32_t forcedMultisampleCount> +template<typename T> INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask, const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL) { - static const bool bPersp = (bool)persp; - static const bool bIsStandardPattern = (bool)standardPattern; - static const bool bForcedMultisampleCount = (bool)forcedMultisampleCount; - - // calculate centroid positions - if(bPersp) + if(T::bIsStandardPattern) { - if(bIsStandardPattern) - { - ///@ todo: don't need to generate input coverage 2x if input coverage and centroid - CalcCentroidPos<(SWR_MULTISAMPLE_COUNT)sampleCount, bForcedMultisampleCount>(psContext, coverageMask, sampleMask, vXSamplePosUL, vYSamplePosUL); - } - else - { - static const __m256 pixelCenter = _simd_set1_ps(0.5f); - psContext.vX.centroid = _simd_add_ps(vXSamplePosUL, pixelCenter); - psContext.vY.centroid = _simd_add_ps(vYSamplePosUL, pixelCenter); - } - // evaluate I,J - psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid); - psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid); - psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet); - psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet); - - // interpolate 1/w - psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid); + ///@ todo: don't need to generate input coverage 2x if input coverage and centroid + CalcCentroidPos<T>(psContext, coverageMask, sampleMask, vXSamplePosUL, vYSamplePosUL); } + else + { + static const __m256 pixelCenter = _simd_set1_ps(0.5f); + psContext.vX.centroid = _simd_add_ps(vXSamplePosUL, pixelCenter); + psContext.vY.centroid = _simd_add_ps(vYSamplePosUL, pixelCenter); + } + // evaluate I,J + psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid); + psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid); + psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet); + psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet); + + // interpolate 1/w + psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid); } template<uint32_t NumRT, uint32_t sampleCountT> @@ -680,13 +671,10 @@ void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_REND } } -template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount> +template<typename T> void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers) { RDTSC_START(BESetup); - // type safety guaranteed from template instantiation in BEChooser<>::GetFunc - static const bool bInputCoverage = (bool)inputCoverage; - static const bool bCentroidPos = (bool)centroidPos; SWR_CONTEXT *pContext = pDC->pContext; const API_STATE& state = GetApiState(pDC); @@ -736,8 +724,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 psContext.J = work.J; psContext.recipDet = work.recipDet; psContext.pRecipW = work.pRecipW; - psContext.pSamplePosX = (const float*)&MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosX; - psContext.pSamplePosY = (const float*)&MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosY; + psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX; + psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY; for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM) { @@ -748,9 +736,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM) { - if(bInputCoverage) + if(T::bInputCoverage) { - generateInputCoverage<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask); + generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask); } if(coverageMask & MASK) @@ -762,7 +750,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext); - if(bCentroidPos) + if(T::bCentroidPos) { // for 1x case, centroid is pixel center psContext.vX.centroid = psContext.vX.center; @@ -873,14 +861,9 @@ Endtile: } } -template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount> +template<typename T> void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers) { - // type safety guaranteed from template instantiation in BEChooser<>::GetFunc - static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT; - static const bool bInputCoverage = (bool)inputCoverage; - static const bool bCentroidPos = (bool)centroidPos; - RDTSC_START(BESetup); SWR_CONTEXT *pContext = pDC->pContext; @@ -930,9 +913,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ psContext.I = work.I; psContext.J = work.J; psContext.recipDet = work.recipDet; - psContext.pSamplePosX = (const float*)&MultisampleTraits<sampleCount>::samplePosX; - psContext.pSamplePosY = (const float*)&MultisampleTraits<sampleCount>::samplePosY; - const uint32_t numSamples = MultisampleTraits<sampleCount>::numSamples; + psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX; + psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY; + const uint32_t numSamples = T::MultisampleT::numSamples; for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM) { @@ -951,16 +934,16 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext); RDTSC_STOP(BEBarycentric, 0, 0); - if(bInputCoverage) + if(T::bInputCoverage) { - generateInputCoverage<sampleCount, SWR_MSAA_STANDARD_PATTERN, false>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask); + generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask); } - if(bCentroidPos) + if(T::bCentroidPos) { ///@ todo: don't need to genererate input coverage 2x if input coverage and centroid RDTSC_START(BEBarycentric); - backendFuncs.pfnCalcCentroidBarycentrics(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL); + CalcCentroidBarycentrics<T>(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL); RDTSC_STOP(BEBarycentric, 0, 0); } @@ -971,8 +954,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ RDTSC_START(BEBarycentric); // calculate per sample positions - psContext.vX.sample = _simd_add_ps(psContext.vX.UL, MultisampleTraits<sampleCount>::vX(sample)); - psContext.vY.sample = _simd_add_ps(psContext.vY.UL, MultisampleTraits<sampleCount>::vY(sample)); + psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample)); + psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample)); simdmask coverageMask = work.coverageMask[sample] & MASK; simdscalar vCoverageMask = vMask(coverageMask); @@ -996,8 +979,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ simdscalar stencilPassMask = vCoverageMask; // offset depth/stencil buffers current sample - uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample); - uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample); + uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample); + uint8_t *pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample); // Early-Z? if (CanEarlyZ(pPSState)) @@ -1032,7 +1015,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ vCoverageMask = _simd_castsi_ps(psContext.activeMask); - //// late-Z + // late-Z if (!CanEarlyZ(pPSState)) { RDTSC_START(BELateDepthTest); @@ -1083,16 +1066,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ } } -template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount> +template<typename T> void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers) { - // type safety guaranteed from template instantiation in BEChooser<>::GetFunc - static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT; - static const bool bIsStandardPattern = (bool)samplePattern; - static const bool bInputCoverage = (bool)inputCoverage; - static const bool bCentroidPos = (bool)centroidPos; - static const bool bForcedSampleCount = (bool)forcedSampleCount; - RDTSC_START(BESetup); SWR_CONTEXT *pContext = pDC->pContext; @@ -1141,35 +1117,25 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t psContext.I = work.I; psContext.J = work.J; psContext.recipDet = work.recipDet; - psContext.pSamplePosX = (const float*)&MultisampleTraits<sampleCount>::samplePosX; - psContext.pSamplePosY = (const float*)&MultisampleTraits<sampleCount>::samplePosY; + psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX; + psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY; psContext.sampleIndex = 0; - uint32_t numCoverageSamples; - if(bIsStandardPattern) - { - numCoverageSamples = MultisampleTraits<sampleCount>::numSamples; - } - else - { - numCoverageSamples = 1; - } - uint32_t numOMSamples; // RT has to be single sample if we're in forcedMSAA mode - if(bForcedSampleCount && (sampleCount > SWR_MULTISAMPLE_1X)) + if(T::bForcedSampleCount && (T::MultisampleT::sampleCount > SWR_MULTISAMPLE_1X)) { numOMSamples = 1; } // unless we're forced to single sample, in which case we run the OM at the sample count of the RT - else if(bForcedSampleCount && (sampleCount == SWR_MULTISAMPLE_1X)) + else if(T::bForcedSampleCount && (T::MultisampleT::sampleCount == SWR_MULTISAMPLE_1X)) { numOMSamples = GetNumSamples(pBlendState->sampleCount); } // else we're in normal MSAA mode and rasterizer and OM are running at the same sample count else { - numOMSamples = MultisampleTraits<sampleCount>::numSamples; + numOMSamples = T::MultisampleT::numSamples; } for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM) @@ -1178,21 +1144,21 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps((float)yy)); for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM) { - simdscalar vZ[MultisampleTraits<sampleCount>::numSamples]{ 0 }; + simdscalar vZ[T::MultisampleT::numSamples]{ 0 }; psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps((float)xx)); // set pixel center positions psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx)); - if (bInputCoverage) + if (T::bInputCoverage) { - generateInputCoverage<sampleCount, bIsStandardPattern, bForcedSampleCount>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask); + generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask); } - if(bCentroidPos) + if(T::bCentroidPos) { ///@ todo: don't need to genererate input coverage 2x if input coverage and centroid RDTSC_START(BEBarycentric); - backendFuncs.pfnCalcCentroidBarycentrics(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL); + CalcCentroidBarycentrics<T>(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL); RDTSC_STOP(BEBarycentric, 0, 0); } @@ -1219,12 +1185,12 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t } // need to declare enough space for all samples - simdscalar vCoverageMask[MultisampleTraits<sampleCount>::numSamples]; - simdscalar depthPassMask[MultisampleTraits<sampleCount>::numSamples]; - simdscalar stencilPassMask[MultisampleTraits<sampleCount>::numSamples]; + simdscalar vCoverageMask[T::MultisampleT::numSamples]; + simdscalar depthPassMask[T::MultisampleT::numSamples]; + simdscalar stencilPassMask[T::MultisampleT::numSamples]; simdscalar anyDepthSamplePassed = _simd_setzero_ps(); simdscalar anyStencilSamplePassed = _simd_setzero_ps(); - for(uint32_t sample = 0; sample < numCoverageSamples; sample++) + for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++) { vCoverageMask[sample] = vMask(work.coverageMask[sample] & MASK); @@ -1237,7 +1203,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t continue; } - if(bForcedSampleCount) + if(T::bForcedSampleCount) { // candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(pBlendState->sampleMask), _simd_setzero_si())); @@ -1252,11 +1218,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t if(!pPSState->writesODepth || rastState.clipDistanceMask) { RDTSC_START(BEBarycentric); - if(bIsStandardPattern) + if(T::bIsStandardPattern) { // calculate per sample positions - psContext.vX.sample = _simd_add_ps(psContext.vX.UL, MultisampleTraits<sampleCount>::vX(sample)); - psContext.vY.sample = _simd_add_ps(psContext.vY.UL, MultisampleTraits<sampleCount>::vY(sample)); + psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample)); + psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample)); } else { @@ -1291,8 +1257,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t } // offset depth/stencil buffers current sample - uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample); - uint8_t * pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample); + uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample); + uint8_t * pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample); // ZTest for this sample RDTSC_START(BEEarlyDepthTest); @@ -1332,8 +1298,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // loop over all samples, broadcasting the results of the PS to all passing pixels for(uint32_t sample = 0; sample < numOMSamples; sample++) { - uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample); - uint8_t * pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample); + uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample); + uint8_t * pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample); // output merger RDTSC_START(BEOutputMerger); @@ -1346,12 +1312,12 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // forcedSampleCount outputs to any pixels with covered samples not masked off by SampleMask // depth test is disabled, so just set the z val to 0. - if(bForcedSampleCount) + if(T::bForcedSampleCount) { coverageMaskSample = depthMaskSample = anyDepthSamplePassed; vInterpolatedZ = _simd_setzero_ps(); } - else if(bIsStandardPattern) + else if(T::bIsStandardPattern) { if(!_simd_movemask_ps(depthPassMask[sample])) { @@ -1393,7 +1359,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t Endtile: RDTSC_START(BEEndTile); - for(uint32_t sample = 0; sample < numCoverageSamples; sample++) + for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++) { work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); } @@ -1413,10 +1379,10 @@ Endtile: template<uint32_t sampleCountT> void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers) { + ///@todo: handle center multisample pattern + typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T; RDTSC_START(BESetup); - static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT; - SWR_CONTEXT *pContext = pDC->pContext; const API_STATE& state = GetApiState(pDC); const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs; @@ -1464,8 +1430,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, { RDTSC_START(BEBarycentric); // calculate per sample positions - psContext.vX.sample = _simd_add_ps(vXSamplePosUL, MultisampleTraits<sampleCount>::vX(sample)); - psContext.vY.sample = _simd_add_ps(vYSamplePosUL, MultisampleTraits<sampleCount>::vY(sample)); + psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample)); + psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample)); backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext); @@ -1486,8 +1452,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, simdscalar stencilPassMask = vCoverageMask; // offset depth/stencil buffers current sample - uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample); - uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample); + uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample); + uint8_t *pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample); RDTSC_START(BEEarlyDepthTest); simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, @@ -1526,7 +1492,6 @@ PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COV PFN_OUTPUT_MERGER gBackendOutputMergerTable[SWR_NUM_RENDERTARGETS+1][SWR_MULTISAMPLE_TYPE_MAX] = {}; PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable[2] = {}; PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable[2] = {}; -PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2] = {}; // Recursive template used to auto-nest conditionals. Converts dynamic enum function // arguments to static template arguments. @@ -1576,34 +1541,18 @@ struct OMChooser // Recursive template used to auto-nest conditionals. Converts dynamic enum function // arguments to static template arguments. template <uint32_t... ArgsT> -struct BECentroidBarycentricChooser +struct BEChooser { - // Last Arg Terminator - template <typename... TArgsT> - static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(uint32_t tArg) - { - if(tArg > 0) - { - return CalcCentroidBarycentrics<ArgsT..., 1>; - } - - return CalcCentroidBarycentrics<ArgsT..., 0>; - } - - // Recursively parse args - template <typename... TArgsT> - static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs) + static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg) { switch(tArg) { - case SWR_MULTISAMPLE_1X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break; - case SWR_MULTISAMPLE_2X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break; - case SWR_MULTISAMPLE_4X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break; - case SWR_MULTISAMPLE_8X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break; - case SWR_MULTISAMPLE_16X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break; + case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<SwrBackendTraits<ArgsT...>>; break; + case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<SwrBackendTraits<ArgsT...>>; break; + case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break; default: - SWR_ASSERT(0 && "Invalid sample count\n"); + SWR_ASSERT(0 && "Invalid backend func\n"); return nullptr; break; } @@ -1611,38 +1560,19 @@ struct BECentroidBarycentricChooser // Recursively parse args template <typename... TArgsT> - static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(uint32_t tArg, TArgsT... remainingArgs) - { - if(tArg > 0) - { - return BECentroidBarycentricChooser<ArgsT..., 1>::GetFunc(remainingArgs...); - } - - return BECentroidBarycentricChooser<ArgsT..., 0>::GetFunc(remainingArgs...); - } -}; - -// Recursive template used to auto-nest conditionals. Converts dynamic enum function -// arguments to static template arguments. -template <uint32_t... ArgsT> -struct BEChooser -{ - // Last Arg Terminator - static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg) + static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs) { switch(tArg) { - case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<ArgsT...>; break; - case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<ArgsT...>; break; - case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<ArgsT...>; break; + case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break; + case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break; default: - SWR_ASSERT(0 && "Invalid backend func\n"); - return nullptr; - break; + SWR_ASSERT(0 && "Invalid sample pattern\n"); + return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); + break; } } - // Recursively parse args template <typename... TArgsT> static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs) @@ -1655,22 +1585,22 @@ struct BEChooser case SWR_MULTISAMPLE_8X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break; case SWR_MULTISAMPLE_16X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break; default: - SWR_ASSERT(0 && "Invalid sample count\n"); - return nullptr; - break; + SWR_ASSERT(0 && "Invalid sample count\n"); + return BEChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); + break; } } // Recursively parse args template <typename... TArgsT> - static PFN_BACKEND_FUNC GetFunc(uint32_t tArg, TArgsT... remainingArgs) + static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs) { - if(tArg > 0) + if(tArg == true) { return BEChooser<ArgsT..., 1>::GetFunc(remainingArgs...); } - return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...); + return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...); } }; @@ -1689,37 +1619,21 @@ void InitBackendOMFuncTable(PFN_OUTPUT_MERGER (&table)[numRenderTargets][numSamp template <SWR_MULTISAMPLE_COUNT numSampleRates> void InitBackendBarycentricsTables(PFN_CALC_PIXEL_BARYCENTRICS (&pixelTable)[2], - PFN_CALC_SAMPLE_BARYCENTRICS (&sampleTable)[2], - PFN_CALC_CENTROID_BARYCENTRICS (¢roidTable)[numSampleRates][2][2][2]) + PFN_CALC_SAMPLE_BARYCENTRICS (&sampleTable)[2]) { pixelTable[0] = CalcPixelBarycentrics<0>; pixelTable[1] = CalcPixelBarycentrics<1>; sampleTable[0] = CalcSampleBarycentrics<0>; sampleTable[1] = CalcSampleBarycentrics<1>; - - for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < numSampleRates; sampleCount++) - { - for(uint32_t baryMask = 0; baryMask < 2; baryMask++) - { - for(uint32_t patternNum = 0; patternNum < 2; patternNum++) - { - for(uint32_t forcedSampleEnable = 0; forcedSampleEnable < 2; forcedSampleEnable++) - { - centroidTable[sampleCount][baryMask][patternNum][forcedSampleEnable]= - BECentroidBarycentricChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, baryMask, patternNum, forcedSampleEnable); - } - } - } - } } void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[2][2]) { - gBackendSingleSample[0][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NONE, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE); - gBackendSingleSample[0][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NONE, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE); - gBackendSingleSample[1][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NORMAL, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE); - gBackendSingleSample[1][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NORMAL, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE); + gBackendSingleSample[0][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false, false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE); + gBackendSingleSample[0][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false, true, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE); + gBackendSingleSample[1][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, true, false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE); + gBackendSingleSample[1][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, true, true, false, false,(SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE); } template <SWR_MULTISAMPLE_COUNT numSampleRates, SWR_MSAA_SAMPLE_PATTERN numSamplePatterns, SWR_INPUT_COVERAGE numCoverageModes> @@ -1734,9 +1648,11 @@ void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[numSampleRates][numSamp for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++) { table[sampleCount][samplePattern][inputCoverage][isCentroid][0] = - BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, samplePattern, inputCoverage, isCentroid, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_PIXEL_RATE); + BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), (isCentroid > 0), + false, false, SWR_BACKEND_MSAA_PIXEL_RATE); table[sampleCount][samplePattern][inputCoverage][isCentroid][1] = - BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, samplePattern, inputCoverage, isCentroid, 1, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_PIXEL_RATE); + BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), (isCentroid > 0), + true, false, SWR_BACKEND_MSAA_PIXEL_RATE); } } } @@ -1751,9 +1667,9 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[numSampleRates][numCov for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < numCoverageModes; inputCoverage++) { table[sampleCount][inputCoverage][0] = - BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, inputCoverage, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE); + BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE); table[sampleCount][inputCoverage][1] = - BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, inputCoverage, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE); + BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), true, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE); } } } @@ -1764,7 +1680,7 @@ void InitBackendFuncTables() InitBackendPixelFuncTable<(SWR_MULTISAMPLE_COUNT)SWR_MULTISAMPLE_TYPE_MAX, SWR_MSAA_SAMPLE_PATTERN_MAX, SWR_INPUT_COVERAGE_MAX>(gBackendPixelRateTable); InitBackendSampleFuncTable<SWR_MULTISAMPLE_TYPE_MAX, SWR_INPUT_COVERAGE_MAX>(gBackendSampleRateTable); InitBackendOMFuncTable<SWR_NUM_RENDERTARGETS+1, SWR_MULTISAMPLE_TYPE_MAX>(gBackendOutputMergerTable); - InitBackendBarycentricsTables<(SWR_MULTISAMPLE_COUNT)(SWR_MULTISAMPLE_TYPE_MAX)>(gPixelBarycentricTable, gSampleBarycentricTable, gCentroidBarycentricTable); + InitBackendBarycentricsTables<(SWR_MULTISAMPLE_COUNT)(SWR_MULTISAMPLE_TYPE_MAX)>(gPixelBarycentricTable, gSampleBarycentricTable); gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ; gBackendNullPs[SWR_MULTISAMPLE_2X] = &BackendNullPS < SWR_MULTISAMPLE_2X > ; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h index d0626b997af..022e60a9413 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend.h @@ -60,7 +60,7 @@ extern const __m256 vULOffsetsY; #define MASK 0xff #endif -template<SWR_MULTISAMPLE_COUNT sampleCountT, bool bIsStandardPattern, bool bForcedSampleCount> +template<typename T> INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (&inputMask)[KNOB_SIMD_WIDTH], const uint32_t sampleMask) { @@ -69,28 +69,28 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t ( __m256i mask[2]; __m256i sampleCoverage[2]; - if(bIsStandardPattern) + if(T::bIsStandardPattern) { __m256i src = _mm256_set1_epi32(0); __m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1; - if(MultisampleTraits<sampleCountT>::numSamples == 1) + if(T::MultisampleT::numSamples == 1) { mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1); } - else if(MultisampleTraits<sampleCountT>::numSamples == 2) + else if(T::MultisampleT::numSamples == 2) { mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1); } - else if(MultisampleTraits<sampleCountT>::numSamples == 4) + else if(T::MultisampleT::numSamples == 4) { mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1); } - else if(MultisampleTraits<sampleCountT>::numSamples == 8) + else if(T::MultisampleT::numSamples == 8) { mask[0] = _mm256_set1_epi32(-1); } - else if(MultisampleTraits<sampleCountT>::numSamples == 16) + else if(T::MultisampleT::numSamples == 16) { mask[0] = _mm256_set1_epi32(-1); mask[1] = _mm256_set1_epi32(-1); @@ -99,7 +99,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t ( // gather coverage for samples 0-7 sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8)); - if(MultisampleTraits<sampleCountT>::numSamples > 8) + if(T::MultisampleT::numSamples > 8) { // gather coverage for samples 8-15 sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8)); @@ -109,23 +109,23 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t ( { // center coverage is the same for all samples; just broadcast to the sample slots uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK); - if(MultisampleTraits<sampleCountT>::numSamples == 1) + if(T::MultisampleT::numSamples == 1) { sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage); } - else if(MultisampleTraits<sampleCountT>::numSamples == 2) + else if(T::MultisampleT::numSamples == 2) { sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage); } - else if(MultisampleTraits<sampleCountT>::numSamples == 4) + else if(T::MultisampleT::numSamples == 4) { sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage); } - else if(MultisampleTraits<sampleCountT>::numSamples == 8) + else if(T::MultisampleT::numSamples == 8) { sampleCoverage[0] = _mm256_set1_epi32(centerCoverage); } - else if(MultisampleTraits<sampleCountT>::numSamples == 16) + else if(T::MultisampleT::numSamples == 16) { sampleCoverage[0] = _mm256_set1_epi32(centerCoverage); sampleCoverage[1] = _mm256_set1_epi32(centerCoverage); @@ -138,7 +138,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t ( __m256i packedCoverage0 = _simd_shuffle_epi8(sampleCoverage[0], mask[0]); __m256i packedCoverage1; - if(MultisampleTraits<sampleCountT>::numSamples > 8) + if(T::MultisampleT::numSamples > 8) { // pull out the the 8bit 4x2 coverage for samples 8-15 into the lower 32 bits of each 128bit lane packedCoverage1 = _simd_shuffle_epi8(sampleCoverage[1], mask[0]); @@ -151,7 +151,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t ( packedCoverage0 = _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), shufRes, 0xFE)); __m256i packedSampleCoverage; - if(MultisampleTraits<sampleCountT>::numSamples > 8) + if(T::MultisampleT::numSamples > 8) { // pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane hiToLow = _mm256_permute2f128_si256(packedCoverage1, packedCoverage1, 0x83); @@ -170,7 +170,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t ( packedCoverage0 = _mm256_permutevar8x32_epi32(packedCoverage0, permMask); __m256i packedSampleCoverage; - if(MultisampleTraits<sampleCountT>::numSamples > 8) + if(T::MultisampleT::numSamples > 8) { permMask = _mm256_set_epi32(0x7, 0x7, 0x7, 0x7, 0x4, 0x0, 0x7, 0x7); // pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane @@ -190,7 +190,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t ( // convert packed sample coverage masks into single coverage masks for all samples for each pixel in the 4x2 inputMask[i] = _simd_movemask_epi8(packedSampleCoverage); - if(!bForcedSampleCount) + if(!T::bForcedSampleCount) { // input coverage has to be anded with sample mask if MSAA isn't forced on inputMask[i] &= sampleMask; @@ -201,10 +201,22 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t ( } } -template<SWR_MULTISAMPLE_COUNT sampleCountT, bool bIsStandardPattern, bool bForcedSampleCount> +template<typename T> INLINE void generateInputCoverage(const uint64_t *const coverageMask, __m256 &inputCoverage, const uint32_t sampleMask) { uint32_t inputMask[KNOB_SIMD_WIDTH]; - generateInputCoverage<sampleCountT, bIsStandardPattern, bForcedSampleCount>(coverageMask, inputMask, sampleMask); + generateInputCoverage<T>(coverageMask, inputMask, sampleMask); inputCoverage = _simd_castsi_ps(_mm256_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0])); } + +template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN, + uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t odepth = 0> +struct SwrBackendTraits +{ + static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN); + static const bool bInputCoverage = (coverage == 1); + static const bool bCentroidPos = (centroid == 1); + static const bool bForcedSampleCount = (forced == 1); + static const bool bWritesODepth = (odepth == 1); + typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, (bIsStandardPattern) ? SWR_MSAA_STANDARD_PATTERN : SWR_MSAA_CENTER_PATTERN> MultisampleT; +};
\ No newline at end of file diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.cpp b/src/gallium/drivers/swr/rasterizer/core/multisample.cpp index d51a546b063..643ba133808 100644 --- a/src/gallium/drivers/swr/rasterizer/core/multisample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/multisample.cpp @@ -49,3 +49,16 @@ const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosX[16] {0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625}; const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16] {0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000}; + +const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosX{ 0.5f }; +const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosY{ 0.5f }; +const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosX[2]{ 0.5f, 0.5f}; +const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosY[2]{ 0.5f, 0.5f}; +const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosX[4]{ 0.5f, 0.5f, 0.5f, 0.5f}; +const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosY[4]{ 0.5f, 0.5f, 0.5f, 0.5f }; +const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosX[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; +const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosY[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; +const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosX[16] +{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; +const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosY[16] +{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
\ No newline at end of file diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.h b/src/gallium/drivers/swr/rasterizer/core/multisample.h index 4ae777e2fc5..c5096ed31c7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/multisample.h +++ b/src/gallium/drivers/swr/rasterizer/core/multisample.h @@ -54,7 +54,7 @@ SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples) // hardcoded offsets based on Direct3d standard multisample positions // 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner // coords are 0.8 fixed point offsets from (0, 0) -template<SWR_MULTISAMPLE_COUNT sampleCount> +template<SWR_MULTISAMPLE_COUNT sampleCount, SWR_MSAA_SAMPLE_PATTERN samplePattern = SWR_MSAA_STANDARD_PATTERN> struct MultisampleTraits { INLINE static __m128i vXi(uint32_t sampleNum) = delete; @@ -74,7 +74,7 @@ struct MultisampleTraits }; template<> -struct MultisampleTraits<SWR_MULTISAMPLE_1X> +struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN> { INLINE static __m128i vXi(uint32_t sampleNum) { @@ -143,10 +143,74 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X> static const float samplePosX; static const float samplePosY; static const uint32_t numSamples = 1; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X; + static const uint32_t numCoverageSamples = 1; }; template<> -struct MultisampleTraits<SWR_MULTISAMPLE_2X> +struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN> +{ + INLINE static __m128i vXi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i vYi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static simdscalar vX(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static simdscalar vY(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static float X(uint32_t sampleNum) {return 0.5f;}; + INLINE static float Y(uint32_t sampleNum) {return 0.5f;}; + + INLINE static __m128i TileSampleOffsetsX() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i TileSampleOffsetsY() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum) + { + return 0; + } + + INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum) + { + return 0; + } + + INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum) + { + return 0; + } + + INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);}; + + static const uint32_t numSamples = 1; + static const float samplePosX; + static const float samplePosY; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X; + static const uint32_t numCoverageSamples = 1; +}; + +template<> +struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN> { INLINE static __m128i vXi(uint32_t sampleNum) { @@ -238,10 +302,92 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X> static const float samplePosX[2]; static const float samplePosY[2]; static const uint32_t numSamples = 2; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X; + static const uint32_t numCoverageSamples = 2; +}; + +template<> +struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN> +{ + INLINE static __m128i vXi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i vYi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static simdscalar vX(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static simdscalar vY(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static float X(uint32_t sampleNum) {return 0.5f;}; + INLINE static float Y(uint32_t sampleNum) {return 0.5f;}; + + INLINE static __m128i TileSampleOffsetsX() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i TileSampleOffsetsY() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileColorOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) + }; + assert(sampleNum < numSamples); + return RasterTileColorOffsets[sampleNum]; + } + + INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileDepthOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) + }; + assert(sampleNum < numSamples); + return RasterTileDepthOffsets[sampleNum]; + } + + INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileStencilOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) + }; + assert(sampleNum < numSamples); + return RasterTileStencilOffsets[sampleNum]; + } + + INLINE static simdscalari FullSampleMask() + { + static const simdscalari mask =_simd_set1_epi32(0x3); + return mask; + } + static const uint32_t numSamples = 2; + static const float samplePosX[2]; + static const float samplePosY[2]; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X; + static const uint32_t numCoverageSamples = 1; }; template<> -struct MultisampleTraits<SWR_MULTISAMPLE_4X> +struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN> { INLINE static __m128i vXi(uint32_t sampleNum) { @@ -343,10 +489,98 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X> static const float samplePosX[4]; static const float samplePosY[4]; static const uint32_t numSamples = 4; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X; + static const uint32_t numCoverageSamples = 4; +}; + +template<> +struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN> +{ + INLINE static __m128i vXi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i vYi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static simdscalar vX(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static simdscalar vY(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static float X(uint32_t sampleNum) {return 0.5f;}; + INLINE static float Y(uint32_t sampleNum) {return 0.5f;}; + + INLINE static __m128i TileSampleOffsetsX() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i TileSampleOffsetsY() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileColorOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3, + }; + assert(sampleNum < numSamples); + return RasterTileColorOffsets[sampleNum]; + } + + INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileDepthOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3, + }; + assert(sampleNum < numSamples); + return RasterTileDepthOffsets[sampleNum]; + } + + INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileStencilOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3, + }; + assert(sampleNum < numSamples); + return RasterTileStencilOffsets[sampleNum]; + } + + INLINE static simdscalari FullSampleMask() + { + static const simdscalari mask = _simd_set1_epi32(0xF); + return mask; + } + static const uint32_t numSamples = 4; + static const float samplePosX[4]; + static const float samplePosY[4]; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X; + static const uint32_t numCoverageSamples = 1; }; template<> -struct MultisampleTraits<SWR_MULTISAMPLE_8X> +struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN> { INLINE static __m128i vXi(uint32_t sampleNum) { @@ -464,10 +698,110 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X> static const float samplePosX[8]; static const float samplePosY[8]; static const uint32_t numSamples = 8; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X; + static const uint32_t numCoverageSamples = 8; }; template<> -struct MultisampleTraits<SWR_MULTISAMPLE_16X> +struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN> +{ + INLINE static __m128i vXi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i vYi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static simdscalar vX(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static simdscalar vY(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static float X(uint32_t sampleNum) {return 0.5f;}; + INLINE static float Y(uint32_t sampleNum) {return 0.5f;}; + + INLINE static __m128i TileSampleOffsetsX() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i TileSampleOffsetsY() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileColorOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7, + }; + assert(sampleNum < numSamples); + return RasterTileColorOffsets[sampleNum]; + } + + INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileDepthOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7, + }; + assert(sampleNum < numSamples); + return RasterTileDepthOffsets[sampleNum]; + } + + INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileStencilOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7, + }; + assert(sampleNum < numSamples); + return RasterTileStencilOffsets[sampleNum]; + } + + INLINE static simdscalari FullSampleMask() + { + static const simdscalari mask = _simd_set1_epi32(0xFF); + return mask; + } + static const uint32_t numSamples = 8; + static const float samplePosX[8]; + static const float samplePosY[8]; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X; + static const uint32_t numCoverageSamples = 1; +}; + +template<> +struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN> { INLINE static __m128i vXi(uint32_t sampleNum) { @@ -617,4 +951,128 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X> static const float samplePosX[16]; static const float samplePosY[16]; static const uint32_t numSamples = 16; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X; + static const uint32_t numCoverageSamples = 16; +}; + +template<> +struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN> +{ + INLINE static __m128i vXi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i vYi(uint32_t sampleNum) + { + return _mm_set1_epi32(0x80); + } + + INLINE static simdscalar vX(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static simdscalar vY(uint32_t sampleNum) + { + return _simd_set1_ps(0.5f); + } + + INLINE static float X(uint32_t sampleNum) {return 0.5f;}; + INLINE static float Y(uint32_t sampleNum) {return 0.5f;}; + + INLINE static __m128i TileSampleOffsetsX() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static __m128i TileSampleOffsetsY() + { + // BR, BL, UR, UL + return _mm_set1_epi32(0x80); + } + + INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileColorOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 10, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 11, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 12, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 13, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 14, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 15, + }; + assert(sampleNum < numSamples); + return RasterTileColorOffsets[sampleNum]; + } + + INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileDepthOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 10, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 11, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 12, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 13, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 14, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 15, + }; + assert(sampleNum < numSamples); + return RasterTileDepthOffsets[sampleNum]; + } + + INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum) + { + static const uint32_t RasterTileStencilOffsets[numSamples] + { 0, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8), + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 8, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 9, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 10, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 11, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 12, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 13, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 14, + (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 15, + }; + assert(sampleNum < numSamples); + return RasterTileStencilOffsets[sampleNum]; + } + + INLINE static simdscalari FullSampleMask() + { + static const simdscalari mask = _simd_set1_epi32(0xFFFF); + return mask; + } + static const uint32_t numSamples = 16; + static const float samplePosX[16]; + static const float samplePosY[16]; + static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X; + static const uint32_t numCoverageSamples = 1; }; |