summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-03-28 15:32:04 -0500
committerTim Rowley <[email protected]>2017-04-05 18:19:25 -0500
commit117fc582f86564b4c37db248b3231b8d86da0039 (patch)
tree4a2160554f25ac2e966880be2273203a92b687bd /src/gallium/drivers/swr/rasterizer
parent3c52a7316a1b968a780f387b70a17183d5bc9054 (diff)
swr: [rasterizer core] Programmable sample position support
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer')
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py22
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdintrin.h7
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp8
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp43
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h141
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/binner.cpp12
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/multisample.cpp13
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/multisample.h500
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp14
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.h3
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h98
11 files changed, 267 insertions, 594 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
index 1e9593a1af1..4cabde3394f 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
@@ -60,6 +60,8 @@ def gen_llvm_type(type, name, is_pointer, is_pointer_pointer, is_array, is_array
llvm_type = 'VectorType::get(Type::getFloatTy(ctx), pJitMgr->mVWidth)'
elif type == 'simdscalari':
llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), pJitMgr->mVWidth)'
+ elif type == '__m128i':
+ llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), 4)'
elif type == 'SIMD8::vector_t':
llvm_type = 'VectorType::get(Type::getFloatTy(ctx), 8)'
elif type == 'SIMD8::vectori_t':
@@ -146,6 +148,26 @@ def gen_llvm_types(input_file, output_file):
is_llvm_struct = False
###########################################
+ # Is field the start of a function? Tells script to ignore it
+ is_llvm_func_start = re.search(r'@llvm_func_start', line)
+
+ if is_llvm_func_start is not None:
+ while not end_of_struct and idx < len(lines)-1:
+ idx += 1
+ line = lines[idx].rstrip()
+ is_llvm_func_end = re.search(r'@llvm_func_end', line)
+ if is_llvm_func_end is not None:
+ break;
+ continue
+
+ ###########################################
+ # Is field a function? Tells script to ignore it
+ is_llvm_func = re.search(r'@llvm_func', line)
+
+ if is_llvm_func is not None:
+ continue
+
+ ###########################################
# Is field a llvm enum? Tells script to treat type as an enum and replaced with uint32 type.
is_llvm_enum = re.search(r'@llvm_enum', line)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index 3cf3b180200..1e3f14ce59a 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -648,6 +648,13 @@ simdscalari _simd_blendv_epi32(simdscalari a, simdscalari b, simdscalari mask)
return _simd_castps_si(_simd_blendv_ps(_simd_castsi_ps(a), _simd_castsi_ps(b), _simd_castsi_ps(mask)));
}
+template<int mask>
+INLINE
+__m128i _simd_blend4_epi32(__m128i a, __m128i b)
+{
+ return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), mask));
+}
+
// convert bitmask to vector mask
INLINE
simdscalar vMask(int32_t mask)
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index dabd0616d3b..1710cc66793 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -793,7 +793,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
const SWR_RASTSTATE &rastState = pState->state.rastState;
const SWR_PS_STATE &psState = pState->state.psState;
BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
- const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
// setup backend
if (psState.pfnPixelShader == nullptr)
@@ -802,7 +801,8 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
}
else
{
- const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
+ const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
+ const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
@@ -815,7 +815,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
{
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage]
+ backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern][psState.inputCoverage]
[centroid][forcedSampleCount][canEarlyZ]
;
}
@@ -827,7 +827,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
}
break;
case SWR_SHADING_RATE_SAMPLE:
- SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN);
+ SWR_ASSERT(rastState.bIsCenterPattern != true);
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 84414d8e721..b76b36fcbcb 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -468,7 +468,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
SWR_PS_CONTEXT psContext;
- SetupPixelShaderContext<T>(&psContext, work);
+ const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+ SetupPixelShaderContext<T>(&psContext, samplePos, work);
AR_END(BESetup, 1);
@@ -517,7 +518,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, true>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
@@ -663,7 +664,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
SWR_PS_CONTEXT psContext;
- SetupPixelShaderContext<T>(&psContext, work);
+ const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+ SetupPixelShaderContext<T>(&psContext, samplePos, work);
AR_END(BESetup, 0);
@@ -696,7 +698,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
AR_END(BEBarycentric, 0);
@@ -725,8 +727,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
+ psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
+ psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
CalcSampleBarycentrics(coeffs, psContext);
@@ -870,7 +872,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
AR_BEGIN(BENullBackend, pDC->drawId);
///@todo: handle center multisample pattern
- typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
+ typedef SwrBackendTraits<sampleCountT, false> T;
AR_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
@@ -889,7 +891,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
-
+ const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
simdscalar vXSamplePosUL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
@@ -928,8 +930,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
- psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
+ psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
+ psContext.vY.sample = _simd_add_ps(vYSamplePosUL, samplePos.vY(sample));
CalcSampleBarycentrics(coeffs, psContext);
@@ -995,7 +997,7 @@ PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
[2] // canEarlyZ
= {};
PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [SWR_MSAA_SAMPLE_PATTERN_COUNT]
+ [2] // isCenterPattern
[SWR_INPUT_COVERAGE_COUNT]
[2] // centroid
[2] // forcedSampleCount
@@ -1029,21 +1031,6 @@ struct BEChooser
// Recursively parse args
template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs)
- {
- switch(tArg)
- {
- case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break;
- case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break;
- default:
- SWR_ASSERT(0 && "Invalid sample pattern\n");
- return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...);
- break;
- }
- }
-
- // Recursively parse args
- template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
{
switch(tArg)
@@ -1098,7 +1085,7 @@ void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COU
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[inputCoverage][isCentroid][canEarlyZ] =
- BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
+ BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
(isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
}
}
@@ -1116,7 +1103,7 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_C
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[sampleCount][inputCoverage][centroid][canEarlyZ] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (SWR_INPUT_COVERAGE)inputCoverage,
+ BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
(centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index f022990bf26..82765c2e877 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -48,7 +48,7 @@ void InitCPSFuncTables();
void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext);
extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [SWR_MSAA_SAMPLE_PATTERN_COUNT]
+ [2] // isCenterPattern
[SWR_INPUT_COVERAGE_COUNT]
[2] // centroid
[2] // forcedSampleCount
@@ -153,66 +153,67 @@ struct generateInputCoverage
__m256i mask[2];
__m256i sampleCoverage[2];
- if(T::bIsStandardPattern)
+
+ if(T::bIsCenterPattern)
{
- __m256i src = _mm256_set1_epi32(0);
- __m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
-
+ // center coverage is the same for all samples; just broadcast to the sample slots
+ uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
if(T::MultisampleT::numSamples == 1)
{
- mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
+ sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
}
else if(T::MultisampleT::numSamples == 2)
{
- mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
+ sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
}
else if(T::MultisampleT::numSamples == 4)
{
- mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
+ sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
}
else if(T::MultisampleT::numSamples == 8)
{
- mask[0] = _mm256_set1_epi32(-1);
+ sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
}
else if(T::MultisampleT::numSamples == 16)
{
- mask[0] = _mm256_set1_epi32(-1);
- mask[1] = _mm256_set1_epi32(-1);
- index1 = _mm256_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
- }
-
- // gather coverage for samples 0-7
- sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
- if(T::MultisampleT::numSamples > 8)
- {
- // gather coverage for samples 8-15
- sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
+ sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
+ sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
}
}
else
{
- // center coverage is the same for all samples; just broadcast to the sample slots
- uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
+ __m256i src = _mm256_set1_epi32(0);
+ __m256i index0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
+
if(T::MultisampleT::numSamples == 1)
{
- sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
+ mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
}
else if(T::MultisampleT::numSamples == 2)
{
- sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
+ mask[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
}
else if(T::MultisampleT::numSamples == 4)
{
- sampleCoverage[0] = _mm256_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
+ mask[0] = _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
}
else if(T::MultisampleT::numSamples == 8)
{
- sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
+ mask[0] = _mm256_set1_epi32(-1);
}
else if(T::MultisampleT::numSamples == 16)
{
- sampleCoverage[0] = _mm256_set1_epi32(centerCoverage);
- sampleCoverage[1] = _mm256_set1_epi32(centerCoverage);
+ mask[0] = _mm256_set1_epi32(-1);
+ mask[1] = _mm256_set1_epi32(-1);
+ index1 = _mm256_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
+ }
+
+ // gather coverage for samples 0-7
+ sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
+ if(T::MultisampleT::numSamples > 8)
+ {
+ // gather coverage for samples 8-15
+ sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
}
}
@@ -332,7 +333,8 @@ struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
// SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<typename T>
-INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask,
+INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS& samplePos,
+ const uint64_t *const coverageMask, const uint32_t sampleMask,
const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
{
uint32_t inputMask[KNOB_SIMD_WIDTH];
@@ -352,23 +354,23 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
(inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
// look up and set the sample offsets from UL pixel corner for first covered sample
- __m256 vXSample = _mm256_set_ps(T::MultisampleT::X(sampleNum[7]),
- T::MultisampleT::X(sampleNum[6]),
- T::MultisampleT::X(sampleNum[5]),
- T::MultisampleT::X(sampleNum[4]),
- T::MultisampleT::X(sampleNum[3]),
- T::MultisampleT::X(sampleNum[2]),
- T::MultisampleT::X(sampleNum[1]),
- T::MultisampleT::X(sampleNum[0]));
-
- __m256 vYSample = _mm256_set_ps(T::MultisampleT::Y(sampleNum[7]),
- T::MultisampleT::Y(sampleNum[6]),
- T::MultisampleT::Y(sampleNum[5]),
- T::MultisampleT::Y(sampleNum[4]),
- T::MultisampleT::Y(sampleNum[3]),
- T::MultisampleT::Y(sampleNum[2]),
- T::MultisampleT::Y(sampleNum[1]),
- T::MultisampleT::Y(sampleNum[0]));
+ __m256 vXSample = _mm256_set_ps(samplePos.X(sampleNum[7]),
+ samplePos.X(sampleNum[6]),
+ samplePos.X(sampleNum[5]),
+ samplePos.X(sampleNum[4]),
+ samplePos.X(sampleNum[3]),
+ samplePos.X(sampleNum[2]),
+ samplePos.X(sampleNum[1]),
+ samplePos.X(sampleNum[0]));
+
+ __m256 vYSample = _mm256_set_ps(samplePos.Y(sampleNum[7]),
+ samplePos.Y(sampleNum[6]),
+ samplePos.Y(sampleNum[5]),
+ samplePos.Y(sampleNum[4]),
+ samplePos.Y(sampleNum[3]),
+ samplePos.Y(sampleNum[2]),
+ samplePos.Y(sampleNum[1]),
+ samplePos.Y(sampleNum[0]));
// add sample offset to UL pixel corner
vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
@@ -398,8 +400,8 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const cov
__m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
- vXSample = _simd_set1_ps(T::MultisampleT::X(firstCoveredSampleMaskSample));
- vYSample = _simd_set1_ps(T::MultisampleT::Y(firstCoveredSampleMaskSample));
+ vXSample = _simd_set1_ps(samplePos.X(firstCoveredSampleMaskSample));
+ vYSample = _simd_set1_ps(samplePos.Y(firstCoveredSampleMaskSample));
// blend in case 3a pixel locations
psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
@@ -494,7 +496,7 @@ inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uin
}
template<typename T>
-void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC &work)
+void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos, SWR_TRIANGLE_DESC &work)
{
psContext->pAttribs = work.pAttribs;
psContext->pPerspAttribs = work.pPerspAttribs;
@@ -507,14 +509,15 @@ void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_TRIANGLE_DESC
psContext->recipDet = work.recipDet;
psContext->pRecipW = work.pRecipW;
- psContext->pSamplePosX = reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
- psContext->pSamplePosY = reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
+ psContext->pSamplePosX = samplePos.X();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
+ psContext->pSamplePosY = samplePos.Y();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
psContext->rasterizerSampleCount = T::MultisampleT::numSamples;
psContext->sampleIndex = 0;
}
template<typename T, bool IsSingleSample>
-void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
+void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos,
+ const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
{
if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid positions are still different
{
@@ -530,15 +533,15 @@ void CalcCentroid(SWR_PS_CONTEXT *psContext, const BarycentricCoeffs &coeffs, co
if (T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
- if (T::bIsStandardPattern)
+ if (T::bIsCenterPattern)
{
- // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
- CalcCentroidPos<T>(*psContext, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
+ psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
+ psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
}
else
{
- psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
- psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
+ // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
+ CalcCentroidPos<T>(*psContext, samplePos, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
}
CalcCentroidBarycentrics(coeffs, *psContext, psContext->vX.UL, psContext->vY.UL);
@@ -557,8 +560,9 @@ struct PixelRateZTestLoop
PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
uint8_t*& depthBuffer, uint8_t*& stencilBuffer, const uint8_t ClipDistanceMask) :
pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
- clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer) {};
-
+ samplePos(state.rastState.samplePositions),
+ clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
+
INLINE
uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
@@ -597,8 +601,8 @@ struct PixelRateZTestLoop
AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
+ psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
+ psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
// calc I & J per sample
CalcSampleBarycentrics(coeffs, psContext);
@@ -673,6 +677,7 @@ private:
const BarycentricCoeffs& coeffs;
const API_STATE& state;
const SWR_PS_STATE& psState;
+ const SWR_MULTISAMPLE_POS& samplePos;
const uint8_t clipDistanceMask;
uint8_t*& pDepthBuffer;
uint8_t*& pStencilBuffer;
@@ -862,7 +867,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
SetupBarycentricCoeffs(&coeffs, work);
SWR_PS_CONTEXT psContext;
- SetupPixelShaderContext<T>(&psContext, work);
+ const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
+ SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
@@ -887,7 +893,6 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
{
#if USE_8x2_TILE_BACKEND
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
-
#endif
simdscalar activeLanes;
if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
@@ -904,7 +909,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, false>(&psContext, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
AR_END(BEBarycentric, 0);
@@ -966,7 +971,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
{
AR_BEGIN(BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
- uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
+ uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
simdscalar coverageMask, depthMask;
if(T::bForcedSampleCount)
{
@@ -1045,15 +1050,15 @@ Endtile:
AR_END(BEPixelRateBackend, 0);
}
-template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
+template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0,
uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
>
struct SwrBackendTraits
{
- static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
+ static const bool bIsCenterPattern = (isCenter == 1);
static const uint32_t InputCoverage = coverage;
static const bool bCentroidPos = (centroid == 1);
static const bool bForcedSampleCount = (forced == 1);
static const bool bCanEarlyZ = (canEarlyZ == 1);
- typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, (bIsStandardPattern) ? SWR_MSAA_STANDARD_PATTERN : SWR_MSAA_CENTER_PATTERN> MultisampleT;
+ typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, bIsCenterPattern> MultisampleT;
};
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index 3d42718a374..f00701f8192 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -640,9 +640,8 @@ void BinTriangles(
else
{
// degenerate triangles won't be sent to rasterizer; just enable all edges
- pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
- (rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID,
- (state.scissorsTileAligned == false));
+ pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
+ (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, ALL_EDGES_VALID, (state.scissorsTileAligned == false));
}
if (!triMask)
@@ -658,7 +657,7 @@ void BinTriangles(
// only discard for non-MSAA case and when conservative rast is disabled
// (xmin + 127) & ~255
// (xmax + 128) & ~255
- if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.samplePattern == SWR_MSAA_CENTER_PATTERN) &&
+ if((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.bIsCenterPattern) &&
(!CT::IsConservativeT::value))
{
origTriMask = triMask;
@@ -787,9 +786,8 @@ endBinTriangles:
{
// only rasterize valid edges if we have a degenerate primitive
int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
- work.pfnWork = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN),
- (rastState.conservativeRast > 0), (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable,
- (state.scissorsTileAligned == false));
+ work.pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
+ (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, triEdgeEnable, (state.scissorsTileAligned == false));
// Degenerate triangles are required to be constant interpolated
isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.cpp b/src/gallium/drivers/swr/rasterizer/core/multisample.cpp
index 94992e30765..88a0ef76144 100644
--- a/src/gallium/drivers/swr/rasterizer/core/multisample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/multisample.cpp
@@ -50,16 +50,3 @@ const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosX[16]
{0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625};
const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16]
{0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000};
-
-const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosX{ 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>::samplePosY{ 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosX[2]{ 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>::samplePosY[2]{ 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosX[4]{ 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>::samplePosY[4]{ 0.5f, 0.5f, 0.5f, 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosX[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>::samplePosY[8]{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
-const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosX[16]
-{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
-const float MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>::samplePosY[16]
-{ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.h b/src/gallium/drivers/swr/rasterizer/core/multisample.h
index 55387a2ec6e..dc2dde9e2b0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/multisample.h
+++ b/src/gallium/drivers/swr/rasterizer/core/multisample.h
@@ -58,70 +58,21 @@ SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
// hardcoded offsets based on Direct3d standard multisample positions
// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
// coords are 0.8 fixed point offsets from (0, 0)
-template<SWR_MULTISAMPLE_COUNT sampleCount, SWR_MSAA_SAMPLE_PATTERN samplePattern = SWR_MSAA_STANDARD_PATTERN>
+template<SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
struct MultisampleTraits
{
- INLINE static __m128i vXi(uint32_t sampleNum) = delete;
- INLINE static __m128i vYi(uint32_t sampleNum) = delete;
- INLINE static simdscalar vX(uint32_t sampleNum) = delete;
- INLINE static simdscalar vY(uint32_t sampleNum) = delete;
INLINE static float X(uint32_t sampleNum) = delete;
INLINE static float Y(uint32_t sampleNum) = delete;
- INLINE static __m128i TileSampleOffsetsX() = delete;
- INLINE static __m128i TileSampleOffsetsY() = delete;
INLINE static simdscalari FullSampleMask() = delete;
static const uint32_t numSamples = 0;
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_1X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- static const __m128i X = _mm_set1_epi32(samplePosXi);
- return X;
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- static const __m128i Y = _mm_set1_epi32(samplePosYi);
- return Y;
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X = _simd_set1_ps(0.5f);
- return X;
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y = _simd_set1_ps(0.5f);
- return Y;
- }
-
INLINE static float X(uint32_t sampleNum) {return samplePosX;};
INLINE static float Y(uint32_t sampleNum) {return samplePosY;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x80;
- static const uint32_t bboxRightEdge = 0x80;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x80;
- static const uint32_t bboxBottomEdge = 0x80;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
static const uint32_t samplePosXi;
@@ -134,43 +85,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_1X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
static const uint32_t numSamples = 1;
@@ -181,57 +99,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_1X, SWR_MSAA_CENTER_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_2X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- static const __m128i X[numSamples] {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1])};
- return X[sampleNum];
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- static const __m128i Y[numSamples] {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1])};
- return Y[sampleNum];
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
- assert(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
- assert(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x40;
- static const uint32_t bboxRightEdge = 0xC0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x40;
- static const uint32_t bboxBottomEdge = 0xC0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask =_simd_set1_epi32(0x3);
@@ -248,43 +119,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_2X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask =_simd_set1_epi32(0x3);
@@ -298,61 +136,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_2X, SWR_MSAA_CENTER_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_4X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- static const __m128i X[numSamples]
- {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3])};
- SWR_ASSERT(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- static const __m128i Y[numSamples]
- {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3])};
- SWR_ASSERT(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X[numSamples]
- {_simd_set1_ps(0.375f), _simd_set1_ps(0.875), _simd_set1_ps(0.125), _simd_set1_ps(0.625)};
- assert(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y[numSamples]
- {_simd_set1_ps(0.125), _simd_set1_ps(0.375f), _simd_set1_ps(0.625), _simd_set1_ps(0.875)};
- assert(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x20;
- static const uint32_t bboxRightEdge = 0xE0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x20;
- static const uint32_t bboxBottomEdge = 0xE0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xF);
@@ -369,48 +156,16 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_4X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xF);
return mask;
}
+
static const uint32_t numSamples = 4;
static const float samplePosX[4];
static const float samplePosY[4];
@@ -419,65 +174,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_4X, SWR_MSAA_CENTER_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_8X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- static const __m128i X[numSamples]
- {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
- _mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7])};
- SWR_ASSERT(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- static const __m128i Y[numSamples]
- {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
- _mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7])};
- SWR_ASSERT(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X[numSamples]
- {_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.8125), _simd_set1_ps(0.3125),
- _simd_set1_ps(0.1875), _simd_set1_ps(0.0625), _simd_set1_ps(0.6875), _simd_set1_ps(0.9375)};
- assert(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y[numSamples]
- {_simd_set1_ps(0.3125), _simd_set1_ps(0.6875), _simd_set1_ps(0.5625), _simd_set1_ps(0.1875),
- _simd_set1_ps(0.8125), _simd_set1_ps(0.4375), _simd_set1_ps(0.9375), _simd_set1_ps(0.0625)};
- assert(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x10;
- static const uint32_t bboxRightEdge = 0xF0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x10;
- static const uint32_t bboxBottomEdge = 0xF0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFF);
@@ -494,43 +194,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_8X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFF);
@@ -544,73 +211,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_8X, SWR_MSAA_CENTER_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_16X, false>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- static const __m128i X[numSamples]
- {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
- _mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7]),
- _mm_set1_epi32(samplePosXi[8]), _mm_set1_epi32(samplePosXi[9]), _mm_set1_epi32(samplePosXi[10]), _mm_set1_epi32(samplePosXi[11]),
- _mm_set1_epi32(samplePosXi[12]), _mm_set1_epi32(samplePosXi[13]), _mm_set1_epi32(samplePosXi[14]), _mm_set1_epi32(samplePosXi[15])};
- SWR_ASSERT(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- static const __m128i Y[numSamples]
- {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
- _mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7]),
- _mm_set1_epi32(samplePosYi[8]), _mm_set1_epi32(samplePosYi[9]), _mm_set1_epi32(samplePosYi[10]), _mm_set1_epi32(samplePosYi[11]),
- _mm_set1_epi32(samplePosYi[12]), _mm_set1_epi32(samplePosYi[13]), _mm_set1_epi32(samplePosYi[14]), _mm_set1_epi32(samplePosYi[15])};
- SWR_ASSERT(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- static const simdscalar X[numSamples]
- {_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.3125), _simd_set1_ps(0.7500),
- _simd_set1_ps(0.1875), _simd_set1_ps(0.6250), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875),
- _simd_set1_ps(0.3750), _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.1250),
- _simd_set1_ps(0.0000), _simd_set1_ps(0.9375), _simd_set1_ps(0.8750), _simd_set1_ps(0.0625)};
- assert(sampleNum < numSamples);
- return X[sampleNum];
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- static const simdscalar Y[numSamples]
- {_simd_set1_ps(0.5625), _simd_set1_ps(0.3125), _simd_set1_ps(0.6250), _simd_set1_ps(0.4375),
- _simd_set1_ps(0.3750), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875), _simd_set1_ps(0.1875),
- _simd_set1_ps(0.8750), _simd_set1_ps(0.0625), _simd_set1_ps(0.1250), _simd_set1_ps(0.7500),
- _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.9375), _simd_set1_ps(0.0000)};
- assert(sampleNum < numSamples);
- return Y[sampleNum];
- }
-
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- static const uint32_t bboxLeftEdge = 0x00;
- static const uint32_t bboxRightEdge = 0xF0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
- return tileSampleOffsetX;
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- static const uint32_t bboxTopEdge = 0x00;
- static const uint32_t bboxBottomEdge = 0xF0;
- // BR, BL, UR, UL
- static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
- return tileSampleOffsetY;
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
@@ -627,43 +231,10 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_STANDARD_PATTERN>
};
template<>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
+struct MultisampleTraits<SWR_MULTISAMPLE_16X, true>
{
- INLINE static __m128i vXi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i vYi(uint32_t sampleNum)
- {
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static simdscalar vX(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
- INLINE static simdscalar vY(uint32_t sampleNum)
- {
- return _simd_set1_ps(0.5f);
- }
-
INLINE static float X(uint32_t sampleNum) {return 0.5f;};
INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-
- INLINE static __m128i TileSampleOffsetsX()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
- INLINE static __m128i TileSampleOffsetsY()
- {
- // BR, BL, UR, UL
- return _mm_set1_epi32(0x80);
- }
-
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
@@ -675,3 +246,50 @@ struct MultisampleTraits<SWR_MULTISAMPLE_16X, SWR_MSAA_CENTER_PATTERN>
static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
static const uint32_t numCoverageSamples = 1;
};
+
+INLINE
+bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount, const SWR_MULTISAMPLE_POS& samplePos)
+{
+ // detect if we're using standard or center sample patterns
+ const uint32_t *standardPosX, *standardPosY;
+ switch(sampleCount)
+ {
+ case SWR_MULTISAMPLE_1X:
+ standardPosX = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosXi;
+ standardPosY = &MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosYi;
+ break;
+ case SWR_MULTISAMPLE_2X:
+ standardPosX = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosXi;
+ standardPosY = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosYi;
+ break;
+ case SWR_MULTISAMPLE_4X:
+ standardPosX = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosXi;
+ standardPosY = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosYi;
+ break;
+ case SWR_MULTISAMPLE_8X:
+ standardPosX = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosXi;
+ standardPosY = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosYi;
+ break;
+ case SWR_MULTISAMPLE_16X:
+ standardPosX = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosXi;
+ standardPosY = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosYi;
+ break;
+ default:
+ break;
+ }
+
+ // scan sample pattern for standard or center
+ uint32_t numSamples = GetNumSamples(sampleCount);
+ bool bIsStandard = true;
+ if(numSamples > 1)
+ {
+ for(uint32_t i = 0; i < numSamples; i++)
+ {
+ bIsStandard = (standardPosX[i] == samplePos.Xi(i)) ||
+ (standardPosY[i] == samplePos.Yi(i));
+ if(!bIsStandard)
+ break;
+ }
+ }
+ return !bIsStandard;
+} \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
index d0fdf4882ff..0837841746e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
@@ -1118,8 +1118,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
__m256d vEdgeTileBbox[3];
if (NumCoverageSamplesT::value > 1)
{
- __m128i vTileSampleBBoxXh = RT::MT::TileSampleOffsetsX();
- __m128i vTileSampleBBoxYh = RT::MT::TileSampleOffsetsY();
+ const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
+ const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
+ const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
__m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
__m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
@@ -1206,8 +1207,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
}
else
{
- __m128i vSampleOffsetXh = RT::MT::vXi(sampleNum);
- __m128i vSampleOffsetYh = RT::MT::vYi(sampleNum);
+ const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
+ __m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
+ __m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
__m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
__m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
@@ -1340,7 +1342,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
// setup triangle rasterizer function
PFN_WORK_FUNC pfnTriRast;
// conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false,
+ pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
// overwrite texcoords for point sprites
@@ -1673,7 +1675,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
// setup triangle rasterizer function
PFN_WORK_FUNC pfnTriRast;
// conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(rastState.sampleCount, (rastState.samplePattern == SWR_MSAA_CENTER_PATTERN), false,
+ pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
SWR_INPUT_COVERAGE_NONE, ALL_EDGES_VALID, (pDC->pState->state.scissorsTileAligned == false));
// make sure this macrotile intersects the triangle
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
index 96b12ae4196..f4aa6eb9f90 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
@@ -115,8 +115,7 @@ template <typename NumSamplesT, typename CenterPatternT, typename ConservativeT,
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, std::integral_constant<uint32_t, EdgeEnableT::value>>
{
- typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value),
- (CenterPatternT::value ? SWR_MSAA_CENTER_PATTERN : SWR_MSAA_STANDARD_PATTERN)> MT;
+ typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value), CenterPatternT::value> MT;
/// Fixed point precision the rasterizer is using
typedef FixedPointTraits<Fixed_16_8> PrecisionT;
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 623e70a1519..eec68cd468b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -29,6 +29,8 @@
#include "common/formats.h"
#include "common/simdintrin.h"
+#include <functional>
+#include <algorithm>
//////////////////////////////////////////////////////////////////////////
/// PRIMITIVE_TOPOLOGY.
@@ -333,8 +335,7 @@ struct SWR_PS_CONTEXT
uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
- uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS];
- // IN: Pointers to render target hottiles
+ uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
};
//////////////////////////////////////////////////////////////////////////
@@ -909,13 +910,6 @@ enum SWR_FRONTWINDING
};
-enum SWR_MSAA_SAMPLE_PATTERN
-{
- SWR_MSAA_CENTER_PATTERN,
- SWR_MSAA_STANDARD_PATTERN,
- SWR_MSAA_SAMPLE_PATTERN_COUNT
-};
-
enum SWR_PIXEL_LOCATION
{
SWR_PIXEL_LOCATION_CENTER,
@@ -925,16 +919,75 @@ enum SWR_PIXEL_LOCATION
// fixed point screen space sample locations within a pixel
struct SWR_MULTISAMPLE_POS
{
- uint32_t x;
- uint32_t y;
-};
+public:
+ INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
+ INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
+ INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
+ INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
+ INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
+ INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
+ INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
+ INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
+ typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
+ INLINE sampleArrayT X() const { return _x; }; // @llvm_func
+ INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
+ INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
+ INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
+ INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
+ INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
+ INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
+ INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
+
+ INLINE void PrecalcSampleData(int numSamples) // @llvm_func_start
+ {
+ for(int i = 0; i < numSamples; i++)
+ {
+ _vXi[i] = _mm_set1_epi32(_xi[i]);
+ _vYi[i] = _mm_set1_epi32(_yi[i]);
+ _vX[i] = _simd_set1_ps(_x[i]);
+ _vY[i] = _simd_set1_ps(_y[i]);
+ }
+ // precalculate the raster tile BB for the rasterizer.
+ CalcTileSampleOffsets(numSamples);
+ } // @llvm_func_end
+
+
+private:
+ INLINE void CalcTileSampleOffsets(int numSamples) // @llvm_func_start
+ {
+ auto expandThenBlend4 = [](uint32_t* min, uint32_t* max, auto mask)
+ {
+ __m128i vMin = _mm_set1_epi32(*min);
+ __m128i vMax = _mm_set1_epi32(*max);
+ return _simd_blend4_epi32<decltype(mask)::value>(vMin, vMax);
+ };
+
+ auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
+ auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
+ std::integral_constant<int, 0xA> xMask;
+ // BR(max), BL(min), UR(max), UL(min)
+ tileSampleOffsetsX = expandThenBlend4(minXi, maxXi, xMask);
+
+ auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
+ auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
+ std::integral_constant<int, 0xC> yMask;
+ // BR(max), BL(min), UR(max), UL(min)
+ tileSampleOffsetsY = expandThenBlend4(minYi, maxYi, yMask);
+ }; // @llvm_func_end
+ // scalar sample values
+ uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
+ uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
+ float _x[SWR_MAX_NUM_MULTISAMPLES];
+ float _y[SWR_MAX_NUM_MULTISAMPLES];
+
+ // precalc'd / vectorized samples
+ __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
+ __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
+ simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
+ simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
+ __m128i tileSampleOffsetsX;
+ __m128i tileSampleOffsetsY;
-enum SWR_MSAA_RASTMODE
-{
- SWR_MSAA_RASTMODE_OFF_PIXEL,
- SWR_MSAA_RASTMODE_OFF_PATTERN,
- SWR_MSAA_RASTMODE_ON_PIXEL,
- SWR_MSAA_RASTMODE_ON_PATTERN
};
//////////////////////////////////////////////////////////////////////////
@@ -951,7 +1004,6 @@ struct SWR_RASTSTATE
uint32_t pointParam : 1;
uint32_t pointSpriteEnable : 1;
uint32_t pointSpriteTopOrigin : 1;
- uint32_t msaaRastEnable : 1;
uint32_t forcedSampleCount : 1;
uint32_t pixelOffset : 1;
uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
@@ -965,15 +1017,11 @@ struct SWR_RASTSTATE
float depthBiasClamp;
SWR_FORMAT depthFormat; // @llvm_enum
- ///@todo: MSAA lines
- // multisample state for MSAA lines
- SWR_MSAA_RASTMODE rastMode; // @llvm_enum
-
// sample count the rasterizer is running at
SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
uint32_t pixelLocation; // UL or Center
- SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES];
- SWR_MSAA_SAMPLE_PATTERN samplePattern; // @llvm_enum
+ SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
+ bool bIsCenterPattern; // @llvm_enum
// user clip/cull distance enables
uint8_t cullDistanceMask;