summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-04-14 17:03:16 -0600
committerTim Rowley <[email protected]>2016-04-27 10:40:44 -0500
commit4e1e0b3a32a129a785bddc193cf105be75c32847 (patch)
tree82e9f195ece7ce2ba3cf2bf0487ee9856b6cd06f /src/gallium
parent43f46caf769b0a6d661850ce2fa64b6f4b085b9f (diff)
swr: [rasterizer core] backend refactor
Lump all template args into a bundle of traits, and add some functionality to the MSAA traits. Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp334
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h50
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/multisample.cpp13
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/multisample.h470
5 files changed, 633 insertions, 238 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index e950e92c874..3b02d197111 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -763,7 +763,6 @@ extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_IN
extern PFN_OUTPUT_MERGER gBackendOutputMergerTable[SWR_NUM_RENDERTARGETS + 1][SWR_MULTISAMPLE_TYPE_MAX];
extern PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable[2];
extern PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable[2];
-extern PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2];
void SetupPipeline(DRAW_CONTEXT *pDC)
{
DRAW_STATE* pState = pDC->pState;
@@ -827,9 +826,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
bBarycentrics = (barycentricsMask & SWR_BARYCENTRIC_PER_SAMPLE_MASK) > 0 ? 1 : 0;
backendFuncs.pfnCalcSampleBarycentrics = gSampleBarycentricTable[bBarycentrics];
-
- bBarycentrics = (barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0 ? 1 : 0;
- backendFuncs.pfnCalcCentroidBarycentrics = gCentroidBarycentricTable[rastState.sampleCount][bBarycentrics][rastState.samplePattern][forcedSampleCount];
}
PFN_PROCESS_PRIMS pfnBinner;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index a2212ba8aa4..310a7edcde1 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -459,10 +459,10 @@ simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscala
return _simd_movemask_ps(vClipMask);
}
-template<bool perspMask>
+template<bool bGenerateBarycentrics>
INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
{
- if(perspMask)
+ if(bGenerateBarycentrics)
{
// evaluate I,J
psContext.vI.center = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center);
@@ -475,10 +475,10 @@ INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEX
}
}
-template<bool perspMask>
+template<bool bGenerateBarycentrics>
INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
{
- if(perspMask)
+ if(bGenerateBarycentrics)
{
// evaluate I,J
psContext.vI.sample = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample);
@@ -502,13 +502,12 @@ INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTE
// evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the pixel, then the first sample covered by the
// SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-template<SWR_MULTISAMPLE_COUNT sampleCount, bool bForcedSampleCount>
+template<typename T>
INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask,
const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
{
uint32_t inputMask[KNOB_SIMD_WIDTH];
-
- generateInputCoverage<sampleCount, 1, bForcedSampleCount>(coverageMask, inputMask, sampleMask);
+ generateInputCoverage<T>(coverageMask, inputMask, sampleMask);
// Case (2) - partially covered pixel
@@ -524,29 +523,29 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
(inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
// look up and set the sample offsets from UL pixel corner for first covered sample
- __m256 vXSample = _mm256_set_ps(MultisampleTraits<sampleCount>::X(sampleNum[7]),
- MultisampleTraits<sampleCount>::X(sampleNum[6]),
- MultisampleTraits<sampleCount>::X(sampleNum[5]),
- MultisampleTraits<sampleCount>::X(sampleNum[4]),
- MultisampleTraits<sampleCount>::X(sampleNum[3]),
- MultisampleTraits<sampleCount>::X(sampleNum[2]),
- MultisampleTraits<sampleCount>::X(sampleNum[1]),
- MultisampleTraits<sampleCount>::X(sampleNum[0]));
-
- __m256 vYSample = _mm256_set_ps(MultisampleTraits<sampleCount>::Y(sampleNum[7]),
- MultisampleTraits<sampleCount>::Y(sampleNum[6]),
- MultisampleTraits<sampleCount>::Y(sampleNum[5]),
- MultisampleTraits<sampleCount>::Y(sampleNum[4]),
- MultisampleTraits<sampleCount>::Y(sampleNum[3]),
- MultisampleTraits<sampleCount>::Y(sampleNum[2]),
- MultisampleTraits<sampleCount>::Y(sampleNum[1]),
- MultisampleTraits<sampleCount>::Y(sampleNum[0]));
+ __m256 vXSample = _mm256_set_ps(T::MultisampleT::X(sampleNum[7]),
+ T::MultisampleT::X(sampleNum[6]),
+ T::MultisampleT::X(sampleNum[5]),
+ T::MultisampleT::X(sampleNum[4]),
+ T::MultisampleT::X(sampleNum[3]),
+ T::MultisampleT::X(sampleNum[2]),
+ T::MultisampleT::X(sampleNum[1]),
+ T::MultisampleT::X(sampleNum[0]));
+
+ __m256 vYSample = _mm256_set_ps(T::MultisampleT::Y(sampleNum[7]),
+ T::MultisampleT::Y(sampleNum[6]),
+ T::MultisampleT::Y(sampleNum[5]),
+ T::MultisampleT::Y(sampleNum[4]),
+ T::MultisampleT::Y(sampleNum[3]),
+ T::MultisampleT::Y(sampleNum[2]),
+ T::MultisampleT::Y(sampleNum[1]),
+ T::MultisampleT::Y(sampleNum[0]));
// add sample offset to UL pixel corner
vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
// Case (1) and case (3b) - All samples covered or not covered with full SampleMask
- static const __m256i vFullyCoveredMask = MultisampleTraits<sampleCount>::FullSampleMask();
+ static const __m256i vFullyCoveredMask = T::MultisampleT::FullSampleMask();
__m256i vInputCoveragei = _mm256_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]);
__m256i vAllSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vFullyCoveredMask);
@@ -570,46 +569,38 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
__m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
- vXSample = _simd_set1_ps(MultisampleTraits<sampleCount>::X(firstCoveredSampleMaskSample));
- vYSample = _simd_set1_ps(MultisampleTraits<sampleCount>::Y(firstCoveredSampleMaskSample));
+ vXSample = _simd_set1_ps(T::MultisampleT::X(firstCoveredSampleMaskSample));
+ vYSample = _simd_set1_ps(T::MultisampleT::Y(firstCoveredSampleMaskSample));
// blend in case 3a pixel locations
psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
psContext.vY.centroid = _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a));
}
-template<uint32_t sampleCount, uint32_t persp, uint32_t standardPattern, uint32_t forcedMultisampleCount>
+template<typename T>
INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext,
const uint64_t *const coverageMask, const uint32_t sampleMask,
const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
{
- static const bool bPersp = (bool)persp;
- static const bool bIsStandardPattern = (bool)standardPattern;
- static const bool bForcedMultisampleCount = (bool)forcedMultisampleCount;
-
- // calculate centroid positions
- if(bPersp)
+ if(T::bIsStandardPattern)
{
- if(bIsStandardPattern)
- {
- ///@ todo: don't need to generate input coverage 2x if input coverage and centroid
- CalcCentroidPos<(SWR_MULTISAMPLE_COUNT)sampleCount, bForcedMultisampleCount>(psContext, coverageMask, sampleMask, vXSamplePosUL, vYSamplePosUL);
- }
- else
- {
- static const __m256 pixelCenter = _simd_set1_ps(0.5f);
- psContext.vX.centroid = _simd_add_ps(vXSamplePosUL, pixelCenter);
- psContext.vY.centroid = _simd_add_ps(vYSamplePosUL, pixelCenter);
- }
- // evaluate I,J
- psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
- psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
- psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet);
- psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet);
-
- // interpolate 1/w
- psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
+ ///@ todo: don't need to generate input coverage 2x if input coverage and centroid
+ CalcCentroidPos<T>(psContext, coverageMask, sampleMask, vXSamplePosUL, vYSamplePosUL);
}
+ else
+ {
+ static const __m256 pixelCenter = _simd_set1_ps(0.5f);
+ psContext.vX.centroid = _simd_add_ps(vXSamplePosUL, pixelCenter);
+ psContext.vY.centroid = _simd_add_ps(vYSamplePosUL, pixelCenter);
+ }
+ // evaluate I,J
+ psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
+ psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
+ psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet);
+ psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet);
+
+ // interpolate 1/w
+ psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
}
template<uint32_t NumRT, uint32_t sampleCountT>
@@ -680,13 +671,10 @@ void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_REND
}
}
-template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
+template<typename T>
void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
RDTSC_START(BESetup);
- // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
- static const bool bInputCoverage = (bool)inputCoverage;
- static const bool bCentroidPos = (bool)centroidPos;
SWR_CONTEXT *pContext = pDC->pContext;
const API_STATE& state = GetApiState(pDC);
@@ -736,8 +724,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
psContext.J = work.J;
psContext.recipDet = work.recipDet;
psContext.pRecipW = work.pRecipW;
- psContext.pSamplePosX = (const float*)&MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosX;
- psContext.pSamplePosY = (const float*)&MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosY;
+ psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
+ psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
@@ -748,9 +736,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
{
- if(bInputCoverage)
+ if(T::bInputCoverage)
{
- generateInputCoverage<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
+ generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
}
if(coverageMask & MASK)
@@ -762,7 +750,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
- if(bCentroidPos)
+ if(T::bCentroidPos)
{
// for 1x case, centroid is pixel center
psContext.vX.centroid = psContext.vX.center;
@@ -873,14 +861,9 @@ Endtile:
}
}
-template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
+template<typename T>
void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
- static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
- static const bool bInputCoverage = (bool)inputCoverage;
- static const bool bCentroidPos = (bool)centroidPos;
-
RDTSC_START(BESetup);
SWR_CONTEXT *pContext = pDC->pContext;
@@ -930,9 +913,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
psContext.I = work.I;
psContext.J = work.J;
psContext.recipDet = work.recipDet;
- psContext.pSamplePosX = (const float*)&MultisampleTraits<sampleCount>::samplePosX;
- psContext.pSamplePosY = (const float*)&MultisampleTraits<sampleCount>::samplePosY;
- const uint32_t numSamples = MultisampleTraits<sampleCount>::numSamples;
+ psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
+ psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
+ const uint32_t numSamples = T::MultisampleT::numSamples;
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
@@ -951,16 +934,16 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
RDTSC_STOP(BEBarycentric, 0, 0);
- if(bInputCoverage)
+ if(T::bInputCoverage)
{
- generateInputCoverage<sampleCount, SWR_MSAA_STANDARD_PATTERN, false>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
+ generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
}
- if(bCentroidPos)
+ if(T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
RDTSC_START(BEBarycentric);
- backendFuncs.pfnCalcCentroidBarycentrics(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
+ CalcCentroidBarycentrics<T>(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
RDTSC_STOP(BEBarycentric, 0, 0);
}
@@ -971,8 +954,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
RDTSC_START(BEBarycentric);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, MultisampleTraits<sampleCount>::vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, MultisampleTraits<sampleCount>::vY(sample));
+ psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
+ psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
simdmask coverageMask = work.coverageMask[sample] & MASK;
simdscalar vCoverageMask = vMask(coverageMask);
@@ -996,8 +979,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
simdscalar stencilPassMask = vCoverageMask;
// offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
- uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
+ uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample);
+ uint8_t *pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample);
// Early-Z?
if (CanEarlyZ(pPSState))
@@ -1032,7 +1015,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
- //// late-Z
+ // late-Z
if (!CanEarlyZ(pPSState))
{
RDTSC_START(BELateDepthTest);
@@ -1083,16 +1066,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
}
}
-template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
+template<typename T>
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
- static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
- static const bool bIsStandardPattern = (bool)samplePattern;
- static const bool bInputCoverage = (bool)inputCoverage;
- static const bool bCentroidPos = (bool)centroidPos;
- static const bool bForcedSampleCount = (bool)forcedSampleCount;
-
RDTSC_START(BESetup);
SWR_CONTEXT *pContext = pDC->pContext;
@@ -1141,35 +1117,25 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
psContext.I = work.I;
psContext.J = work.J;
psContext.recipDet = work.recipDet;
- psContext.pSamplePosX = (const float*)&MultisampleTraits<sampleCount>::samplePosX;
- psContext.pSamplePosY = (const float*)&MultisampleTraits<sampleCount>::samplePosY;
+ psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
+ psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
psContext.sampleIndex = 0;
- uint32_t numCoverageSamples;
- if(bIsStandardPattern)
- {
- numCoverageSamples = MultisampleTraits<sampleCount>::numSamples;
- }
- else
- {
- numCoverageSamples = 1;
- }
-
uint32_t numOMSamples;
// RT has to be single sample if we're in forcedMSAA mode
- if(bForcedSampleCount && (sampleCount > SWR_MULTISAMPLE_1X))
+ if(T::bForcedSampleCount && (T::MultisampleT::sampleCount > SWR_MULTISAMPLE_1X))
{
numOMSamples = 1;
}
// unless we're forced to single sample, in which case we run the OM at the sample count of the RT
- else if(bForcedSampleCount && (sampleCount == SWR_MULTISAMPLE_1X))
+ else if(T::bForcedSampleCount && (T::MultisampleT::sampleCount == SWR_MULTISAMPLE_1X))
{
numOMSamples = GetNumSamples(pBlendState->sampleCount);
}
// else we're in normal MSAA mode and rasterizer and OM are running at the same sample count
else
{
- numOMSamples = MultisampleTraits<sampleCount>::numSamples;
+ numOMSamples = T::MultisampleT::numSamples;
}
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
@@ -1178,21 +1144,21 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps((float)yy));
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
{
- simdscalar vZ[MultisampleTraits<sampleCount>::numSamples]{ 0 };
+ simdscalar vZ[T::MultisampleT::numSamples]{ 0 };
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps((float)xx));
// set pixel center positions
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
- if (bInputCoverage)
+ if (T::bInputCoverage)
{
- generateInputCoverage<sampleCount, bIsStandardPattern, bForcedSampleCount>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
+ generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
}
- if(bCentroidPos)
+ if(T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
RDTSC_START(BEBarycentric);
- backendFuncs.pfnCalcCentroidBarycentrics(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
+ CalcCentroidBarycentrics<T>(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
RDTSC_STOP(BEBarycentric, 0, 0);
}
@@ -1219,12 +1185,12 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
}
// need to declare enough space for all samples
- simdscalar vCoverageMask[MultisampleTraits<sampleCount>::numSamples];
- simdscalar depthPassMask[MultisampleTraits<sampleCount>::numSamples];
- simdscalar stencilPassMask[MultisampleTraits<sampleCount>::numSamples];
+ simdscalar vCoverageMask[T::MultisampleT::numSamples];
+ simdscalar depthPassMask[T::MultisampleT::numSamples];
+ simdscalar stencilPassMask[T::MultisampleT::numSamples];
simdscalar anyDepthSamplePassed = _simd_setzero_ps();
simdscalar anyStencilSamplePassed = _simd_setzero_ps();
- for(uint32_t sample = 0; sample < numCoverageSamples; sample++)
+ for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
vCoverageMask[sample] = vMask(work.coverageMask[sample] & MASK);
@@ -1237,7 +1203,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
continue;
}
- if(bForcedSampleCount)
+ if(T::bForcedSampleCount)
{
// candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(pBlendState->sampleMask), _simd_setzero_si()));
@@ -1252,11 +1218,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
if(!pPSState->writesODepth || rastState.clipDistanceMask)
{
RDTSC_START(BEBarycentric);
- if(bIsStandardPattern)
+ if(T::bIsStandardPattern)
{
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, MultisampleTraits<sampleCount>::vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, MultisampleTraits<sampleCount>::vY(sample));
+ psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
+ psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
}
else
{
@@ -1291,8 +1257,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
}
// offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
- uint8_t * pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
+ uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample);
+ uint8_t * pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample);
// ZTest for this sample
RDTSC_START(BEEarlyDepthTest);
@@ -1332,8 +1298,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// loop over all samples, broadcasting the results of the PS to all passing pixels
for(uint32_t sample = 0; sample < numOMSamples; sample++)
{
- uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
- uint8_t * pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
+ uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample);
+ uint8_t * pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample);
// output merger
RDTSC_START(BEOutputMerger);
@@ -1346,12 +1312,12 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// forcedSampleCount outputs to any pixels with covered samples not masked off by SampleMask
// depth test is disabled, so just set the z val to 0.
- if(bForcedSampleCount)
+ if(T::bForcedSampleCount)
{
coverageMaskSample = depthMaskSample = anyDepthSamplePassed;
vInterpolatedZ = _simd_setzero_ps();
}
- else if(bIsStandardPattern)
+ else if(T::bIsStandardPattern)
{
if(!_simd_movemask_ps(depthPassMask[sample]))
{
@@ -1393,7 +1359,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
Endtile:
RDTSC_START(BEEndTile);
- for(uint32_t sample = 0; sample < numCoverageSamples; sample++)
+ for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
@@ -1413,10 +1379,10 @@ Endtile:
template<uint32_t sampleCountT>
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
+ ///@todo: handle center multisample pattern
+ typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
RDTSC_START(BESetup);
- static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
-
SWR_CONTEXT *pContext = pDC->pContext;
const API_STATE& state = GetApiState(pDC);
const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
@@ -1464,8 +1430,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
{
RDTSC_START(BEBarycentric);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(vXSamplePosUL, MultisampleTraits<sampleCount>::vX(sample));
- psContext.vY.sample = _simd_add_ps(vYSamplePosUL, MultisampleTraits<sampleCount>::vY(sample));
+ psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
+ psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
@@ -1486,8 +1452,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
simdscalar stencilPassMask = vCoverageMask;
// offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
- uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
+ uint8_t *pDepthSample = pDepthBase + T::MultisampleT::RasterTileDepthOffset(sample);
+ uint8_t *pStencilSample = pStencilBase + T::MultisampleT::RasterTileStencilOffset(sample);
RDTSC_START(BEEarlyDepthTest);
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
@@ -1526,7 +1492,6 @@ PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COV
PFN_OUTPUT_MERGER gBackendOutputMergerTable[SWR_NUM_RENDERTARGETS+1][SWR_MULTISAMPLE_TYPE_MAX] = {};
PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable[2] = {};
PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable[2] = {};
-PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2] = {};
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
// arguments to static template arguments.
@@ -1576,34 +1541,18 @@ struct OMChooser
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
// arguments to static template arguments.
template <uint32_t... ArgsT>
-struct BECentroidBarycentricChooser
+struct BEChooser
{
-
// Last Arg Terminator
- template <typename... TArgsT>
- static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(uint32_t tArg)
- {
- if(tArg > 0)
- {
- return CalcCentroidBarycentrics<ArgsT..., 1>;
- }
-
- return CalcCentroidBarycentrics<ArgsT..., 0>;
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
+ static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
{
switch(tArg)
{
- case SWR_MULTISAMPLE_1X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_2X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_4X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_8X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_16X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
+ case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<SwrBackendTraits<ArgsT...>>; break;
+ case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<SwrBackendTraits<ArgsT...>>; break;
+ case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break;
default:
- SWR_ASSERT(0 && "Invalid sample count\n");
+ SWR_ASSERT(0 && "Invalid backend func\n");
return nullptr;
break;
}
@@ -1611,38 +1560,19 @@ struct BECentroidBarycentricChooser
// Recursively parse args
template <typename... TArgsT>
- static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(uint32_t tArg, TArgsT... remainingArgs)
- {
- if(tArg > 0)
- {
- return BECentroidBarycentricChooser<ArgsT..., 1>::GetFunc(remainingArgs...);
- }
-
- return BECentroidBarycentricChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
- }
-};
-
-// Recursive template used to auto-nest conditionals. Converts dynamic enum function
-// arguments to static template arguments.
-template <uint32_t... ArgsT>
-struct BEChooser
-{
- // Last Arg Terminator
- static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
+ static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs)
{
switch(tArg)
{
- case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<ArgsT...>; break;
- case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<ArgsT...>; break;
- case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<ArgsT...>; break;
+ case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break;
+ case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break;
default:
- SWR_ASSERT(0 && "Invalid backend func\n");
- return nullptr;
- break;
+ SWR_ASSERT(0 && "Invalid sample pattern\n");
+ return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...);
+ break;
}
}
-
// Recursively parse args
template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
@@ -1655,22 +1585,22 @@ struct BEChooser
case SWR_MULTISAMPLE_8X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
case SWR_MULTISAMPLE_16X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
default:
- SWR_ASSERT(0 && "Invalid sample count\n");
- return nullptr;
- break;
+ SWR_ASSERT(0 && "Invalid sample count\n");
+ return BEChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
+ break;
}
}
// Recursively parse args
template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(uint32_t tArg, TArgsT... remainingArgs)
+ static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
{
- if(tArg > 0)
+ if(tArg == true)
{
return BEChooser<ArgsT..., 1>::GetFunc(remainingArgs...);
}
- return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
+ return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
}
};
@@ -1689,37 +1619,21 @@ void InitBackendOMFuncTable(PFN_OUTPUT_MERGER (&table)[numRenderTargets][numSamp
template <SWR_MULTISAMPLE_COUNT numSampleRates>
void InitBackendBarycentricsTables(PFN_CALC_PIXEL_BARYCENTRICS (&pixelTable)[2],
- PFN_CALC_SAMPLE_BARYCENTRICS (&sampleTable)[2],
- PFN_CALC_CENTROID_BARYCENTRICS (&centroidTable)[numSampleRates][2][2][2])
+ PFN_CALC_SAMPLE_BARYCENTRICS (&sampleTable)[2])
{
pixelTable[0] = CalcPixelBarycentrics<0>;
pixelTable[1] = CalcPixelBarycentrics<1>;
sampleTable[0] = CalcSampleBarycentrics<0>;
sampleTable[1] = CalcSampleBarycentrics<1>;
-
- for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < numSampleRates; sampleCount++)
- {
- for(uint32_t baryMask = 0; baryMask < 2; baryMask++)
- {
- for(uint32_t patternNum = 0; patternNum < 2; patternNum++)
- {
- for(uint32_t forcedSampleEnable = 0; forcedSampleEnable < 2; forcedSampleEnable++)
- {
- centroidTable[sampleCount][baryMask][patternNum][forcedSampleEnable]=
- BECentroidBarycentricChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, baryMask, patternNum, forcedSampleEnable);
- }
- }
- }
- }
}
void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[2][2])
{
- gBackendSingleSample[0][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NONE, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
- gBackendSingleSample[0][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NONE, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
- gBackendSingleSample[1][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NORMAL, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
- gBackendSingleSample[1][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NORMAL, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
+ gBackendSingleSample[0][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false, false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
+ gBackendSingleSample[0][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false, true, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
+ gBackendSingleSample[1][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, true, false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
+ gBackendSingleSample[1][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, true, true, false, false,(SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
}
template <SWR_MULTISAMPLE_COUNT numSampleRates, SWR_MSAA_SAMPLE_PATTERN numSamplePatterns, SWR_INPUT_COVERAGE numCoverageModes>
@@ -1734,9 +1648,11 @@ void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[numSampleRates][numSamp
for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
{
table[sampleCount][samplePattern][inputCoverage][isCentroid][0] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, samplePattern, inputCoverage, isCentroid, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_PIXEL_RATE);
+ BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), (isCentroid > 0),
+ false, false, SWR_BACKEND_MSAA_PIXEL_RATE);
table[sampleCount][samplePattern][inputCoverage][isCentroid][1] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, samplePattern, inputCoverage, isCentroid, 1, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_PIXEL_RATE);
+ BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), (isCentroid > 0),
+ true, false, SWR_BACKEND_MSAA_PIXEL_RATE);
}
}
}
@@ -1751,9 +1667,9 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[numSampleRates][numCov
for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < numCoverageModes; inputCoverage++)
{
table[sampleCount][inputCoverage][0] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, inputCoverage, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
+ BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), false, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
table[sampleCount][inputCoverage][1] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, inputCoverage, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
+ BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL), true, false, false, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
}
}
}
@@ -1764,7 +1680,7 @@ void InitBackendFuncTables()
InitBackendPixelFuncTable<(SWR_MULTISAMPLE_COUNT)SWR_MULTISAMPLE_TYPE_MAX, SWR_MSAA_SAMPLE_PATTERN_MAX, SWR_INPUT_COVERAGE_MAX>(gBackendPixelRateTable);
InitBackendSampleFuncTable<SWR_MULTISAMPLE_TYPE_MAX, SWR_INPUT_COVERAGE_MAX>(gBackendSampleRateTable);
InitBackendOMFuncTable<SWR_NUM_RENDERTARGETS+1, SWR_MULTISAMPLE_TYPE_MAX>(gBackendOutputMergerTable);
- InitBackendBarycentricsTables<(SWR_MULTISAMPLE_COUNT)(SWR_MULTISAMPLE_TYPE_MAX)>(gPixelBarycentricTable, gSampleBarycentricTable, gCentroidBarycentricTable);
+ InitBackendBarycentricsTables<(SWR_MULTISAMPLE_COUNT)(SWR_MULTISAMPLE_TYPE_MAX)>(gPixelBarycentricTable, gSampleBarycentricTable);
gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ;
gBackendNullPs[SWR_MULTISAMPLE_2X] = &BackendNullPS < SWR_MULTISAMPLE_2X > ;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index d0626b997af..022e60a9413 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -60,7 +60,7 @@ extern const __m256 vULOffsetsY;
#define MASK 0xff
#endif
-template<SWR_MULTISAMPLE_COUNT sampleCountT, bool bIsStandardPattern, bool bForcedSampleCount>
+template<typename T>
INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (&inputMask)[KNOB_SIMD_WIDTH], const uint32_t sampleMask)
{
@@ -69,28 +69,28 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
__m256i mask[2];
__m256i sampleCoverage[2];
- if(bIsStandardPattern)
+ if(T::bIsStandardPattern)
{
__m256i src = _mm256_set1_epi32(0);
__m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
- if(MultisampleTraits<sampleCountT>::numSamples == 1)
+ if(T::MultisampleT::numSamples == 1)
{
mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
}
- else if(MultisampleTraits<sampleCountT>::numSamples == 2)
+ else if(T::MultisampleT::numSamples == 2)
{
mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
}
- else if(MultisampleTraits<sampleCountT>::numSamples == 4)
+ else if(T::MultisampleT::numSamples == 4)
{
mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
}
- else if(MultisampleTraits<sampleCountT>::numSamples == 8)
+ else if(T::MultisampleT::numSamples == 8)
{
mask[0] = _mm256_set1_epi32(-1);
}
- else if(MultisampleTraits<sampleCountT>::numSamples == 16)
+ else if(T::MultisampleT::numSamples == 16)
{
mask[0] = _mm256_set1_epi32(-1);
mask[1] = _mm256_set1_epi32(-1);
@@ -99,7 +99,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
// gather coverage for samples 0-7
sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
- if(MultisampleTraits<sampleCountT>::numSamples > 8)
+ if(T::MultisampleT::numSamples > 8)
{
// gather coverage for samples 8-15
sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
@@ -109,23 +109,23 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
{
// center coverage is the same for all samples; just broadcast to the sample slots
uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
- if(MultisampleTraits<sampleCountT>::numSamples == 1)
+ if(T::MultisampleT::numSamples == 1)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
}
- else if(MultisampleTraits<sampleCountT>::numSamples == 2)
+ else if(T::MultisampleT::numSamples == 2)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
}
- else if(MultisampleTraits<sampleCountT>::numSamples == 4)
+ else if(T::MultisampleT::numSamples == 4)
{
sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
}
- else if(MultisampleTraits<sampleCountT>::numSamples == 8)
+ else if(T::MultisampleT::numSamples == 8)
{
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
}
- else if(MultisampleTraits<sampleCountT>::numSamples == 16)
+ else if(T::MultisampleT::numSamples == 16)
{
sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
@@ -138,7 +138,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
__m256i packedCoverage0 = _simd_shuffle_epi8(sampleCoverage[0], mask[0]);
__m256i packedCoverage1;
- if(MultisampleTraits<sampleCountT>::numSamples > 8)
+ if(T::MultisampleT::numSamples > 8)
{
// pull out the the 8bit 4x2 coverage for samples 8-15 into the lower 32 bits of each 128bit lane
packedCoverage1 = _simd_shuffle_epi8(sampleCoverage[1], mask[0]);
@@ -151,7 +151,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
packedCoverage0 = _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), shufRes, 0xFE));
__m256i packedSampleCoverage;
- if(MultisampleTraits<sampleCountT>::numSamples > 8)
+ if(T::MultisampleT::numSamples > 8)
{
// pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
hiToLow = _mm256_permute2f128_si256(packedCoverage1, packedCoverage1, 0x83);
@@ -170,7 +170,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
packedCoverage0 = _mm256_permutevar8x32_epi32(packedCoverage0, permMask);
__m256i packedSampleCoverage;
- if(MultisampleTraits<sampleCountT>::numSamples > 8)
+ if(T::MultisampleT::numSamples > 8)
{
permMask = _mm256_set_epi32(0x7, 0x7, 0x7, 0x7, 0x4, 0x0, 0x7, 0x7);
// pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
@@ -190,7 +190,7 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
// convert packed sample coverage masks into single coverage masks for all samples for each pixel in the 4x2
inputMask[i] = _simd_movemask_epi8(packedSampleCoverage);
- if(!bForcedSampleCount)
+ if(!T::bForcedSampleCount)
{
// input coverage has to be anded with sample mask if MSAA isn't forced on
inputMask[i] &= sampleMask;
@@ -201,10 +201,22 @@ INLINE void generateInputCoverage(const uint64_t *const coverageMask, uint32_t (
}
}
-template<SWR_MULTISAMPLE_COUNT sampleCountT, bool bIsStandardPattern, bool bForcedSampleCount>
+template<typename T>
INLINE void generateInputCoverage(const uint64_t *const coverageMask, __m256 &inputCoverage, const uint32_t sampleMask)
{
uint32_t inputMask[KNOB_SIMD_WIDTH];
- generateInputCoverage<sampleCountT, bIsStandardPattern, bForcedSampleCount>(coverageMask, inputMask, sampleMask);
+ generateInputCoverage<T>(coverageMask, inputMask, sampleMask);
inputCoverage = _simd_castsi_ps(_mm256_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]));
}
+
+template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
+ uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t odepth = 0>
+struct SwrBackendTraits
+{
+ static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
+ static const bool bInputCoverage = (coverage == 1);
+ static const bool bCentroidPos = (centroid == 1);
+ static const bool bForcedSampleCount = (forced == 1);
+ static const bool bWritesODepth = (odepth == 1);
+ typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, (bIsStandardPattern) ? SWR_MSAA_STANDARD_PATTERN : SWR_MSAA_CENTER_PATTERN> MultisampleT;
+}; \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.cpp b/src/gallium/drivers/swr/rasterizer/core/multisample.cpp
index d51a546b063..643ba133808 100644
--- a/src/gallium/drivers/swr/rasterizer/core/multisample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/multisample.cpp
@@ -49,3 +49,16 @@ const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosX[16]
{0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625};
const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16]
{0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000};
+
+const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosX{ 0.5f };
+const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosY{ 0.5f };
+const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosX[2]{ 0.5f, 0.5f};
+const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosY[2]{ 0.5f, 0.5f};
+const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosX[4]{ 0.5f, 0.5f, 0.5f, 0.5f};
+const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosY[4]{ 0.5f, 0.5f, 0.5f, 0.5f };
+const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosX[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
+const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosY[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
+const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosX[16]
+{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
+const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosY[16]
+{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.h b/src/gallium/drivers/swr/rasterizer/core/multisample.h
index 4ae777e2fc5..c5096ed31c7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/multisample.h
+++ b/src/gallium/drivers/swr/rasterizer/core/multisample.h
@@ -54,7 +54,7 @@ SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
// hardcoded offsets based on Direct3d standard multisample positions
// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
// coords are 0.8 fixed point offsets from (0, 0)
-template<SWR_MULTISAMPLE_COUNT sampleCount>
+template<SWR_MULTISAMPLE_COUNT sampleCount, SWR_MSAA_SAMPLE_PATTERN samplePattern = SWR_MSAA_STANDARD_PATTERN>
struct MultisampleTraits
{
INLINE static __m128i vXi(uint32_t sampleNum) = delete;
@@ -74,7 +74,7 @@ struct MultisampleTraits
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X>
+struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
@@ -143,10 +143,74 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X>
static const float samplePosX;
static const float samplePosY;
static const uint32_t numSamples = 1;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
+ static const uint32_t numCoverageSamples = 1;
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X>
+struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
+{
+ INLINE static __m128i vXi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i vYi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static simdscalar vX(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static simdscalar vY(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static float X(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+
+ INLINE static __m128i TileSampleOffsetsX()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i TileSampleOffsetsY()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
+ {
+ return 0;
+ }
+
+ INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
+ {
+ return 0;
+ }
+
+ INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
+ {
+ return 0;
+ }
+
+ INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
+
+ static const uint32_t numSamples = 1;
+ static const float samplePosX;
+ static const float samplePosY;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
+ static const uint32_t numCoverageSamples = 1;
+};
+
+template<>
+struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
@@ -238,10 +302,92 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X>
static const float samplePosX[2];
static const float samplePosY[2];
static const uint32_t numSamples = 2;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
+ static const uint32_t numCoverageSamples = 2;
+};
+
+template<>
+struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
+{
+ INLINE static __m128i vXi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i vYi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static simdscalar vX(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static simdscalar vY(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static float X(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+
+ INLINE static __m128i TileSampleOffsetsX()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i TileSampleOffsetsY()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileColorOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileColorOffsets[sampleNum];
+ }
+
+ INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileDepthOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileDepthOffsets[sampleNum];
+ }
+
+ INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileStencilOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileStencilOffsets[sampleNum];
+ }
+
+ INLINE static simdscalari FullSampleMask()
+ {
+ static const simdscalari mask =_simd_set1_epi32(0x3);
+ return mask;
+ }
+ static const uint32_t numSamples = 2;
+ static const float samplePosX[2];
+ static const float samplePosY[2];
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
+ static const uint32_t numCoverageSamples = 1;
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X>
+struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
@@ -343,10 +489,98 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X>
static const float samplePosX[4];
static const float samplePosY[4];
static const uint32_t numSamples = 4;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
+ static const uint32_t numCoverageSamples = 4;
+};
+
+template<>
+struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
+{
+ INLINE static __m128i vXi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i vYi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static simdscalar vX(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static simdscalar vY(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static float X(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+
+ INLINE static __m128i TileSampleOffsetsX()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i TileSampleOffsetsY()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileColorOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileColorOffsets[sampleNum];
+ }
+
+ INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileDepthOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileDepthOffsets[sampleNum];
+ }
+
+ INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileStencilOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileStencilOffsets[sampleNum];
+ }
+
+ INLINE static simdscalari FullSampleMask()
+ {
+ static const simdscalari mask = _simd_set1_epi32(0xF);
+ return mask;
+ }
+ static const uint32_t numSamples = 4;
+ static const float samplePosX[4];
+ static const float samplePosY[4];
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
+ static const uint32_t numCoverageSamples = 1;
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X>
+struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
@@ -464,10 +698,110 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X>
static const float samplePosX[8];
static const float samplePosY[8];
static const uint32_t numSamples = 8;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
+ static const uint32_t numCoverageSamples = 8;
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X>
+struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
+{
+ INLINE static __m128i vXi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i vYi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static simdscalar vX(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static simdscalar vY(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static float X(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+
+ INLINE static __m128i TileSampleOffsetsX()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i TileSampleOffsetsY()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileColorOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileColorOffsets[sampleNum];
+ }
+
+ INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileDepthOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileDepthOffsets[sampleNum];
+ }
+
+ INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileStencilOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileStencilOffsets[sampleNum];
+ }
+
+ INLINE static simdscalari FullSampleMask()
+ {
+ static const simdscalari mask = _simd_set1_epi32(0xFF);
+ return mask;
+ }
+ static const uint32_t numSamples = 8;
+ static const float samplePosX[8];
+ static const float samplePosY[8];
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
+ static const uint32_t numCoverageSamples = 1;
+};
+
+template<>
+struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
{
INLINE static __m128i vXi(uint32_t sampleNum)
{
@@ -617,4 +951,128 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X>
static const float samplePosX[16];
static const float samplePosY[16];
static const uint32_t numSamples = 16;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
+ static const uint32_t numCoverageSamples = 16;
+};
+
+template<>
+struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
+{
+ INLINE static __m128i vXi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i vYi(uint32_t sampleNum)
+ {
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static simdscalar vX(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static simdscalar vY(uint32_t sampleNum)
+ {
+ return _simd_set1_ps(0.5f);
+ }
+
+ INLINE static float X(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+
+ INLINE static __m128i TileSampleOffsetsX()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static __m128i TileSampleOffsetsY()
+ {
+ // BR, BL, UR, UL
+ return _mm_set1_epi32(0x80);
+ }
+
+ INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileColorOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 10,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 11,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 12,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 13,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 14,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 15,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileColorOffsets[sampleNum];
+ }
+
+ INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileDepthOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 10,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 11,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 12,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 13,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 14,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 15,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileDepthOffsets[sampleNum];
+ }
+
+ INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
+ {
+ static const uint32_t RasterTileStencilOffsets[numSamples]
+ { 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 8,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 9,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 10,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 11,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 12,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 13,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 14,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 15,
+ };
+ assert(sampleNum < numSamples);
+ return RasterTileStencilOffsets[sampleNum];
+ }
+
+ INLINE static simdscalari FullSampleMask()
+ {
+ static const simdscalari mask = _simd_set1_epi32(0xFFFF);
+ return mask;
+ }
+ static const uint32_t numSamples = 16;
+ static const float samplePosX[16];
+ static const float samplePosY[16];
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
+ static const uint32_t numCoverageSamples = 1;
};