summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-08-18 10:56:15 -0500
committerTim Rowley <[email protected]>2016-08-29 12:41:16 -0500
commitb473bec87878fd52eef8ba1ffbc9cf11dc00dc0f (patch)
tree388fdc999488f0f7840c3ba215106c7ecfe392fa
parent63ed11cde9987e438bf28ef74879e2700971eb26 (diff)
swr: [rasterizer core] per-primitive viewports/scissors
- use per-primitive viewports throughout the pipeline. - track whether all available scissor rects are tile aligned. Causes failures, so not taken into account when choosing rasterizer yet. Signed-off-by: Tim Rowley <[email protected]>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp64
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp28
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h7
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/depthstencil.h6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp144
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp32
7 files changed, 214 insertions, 71 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index d53a6cbedda..5369c21250a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -727,34 +727,52 @@ void SwrSetScissorRects(
void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
{
API_STATE *pState = &pDC->pState->state;
+ uint32_t numScissors = pState->gsState.emitsViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
+ pState->scissorsTileAligned = true;
- // Set up scissor dimensions based on scissor or viewport
- if (pState->rastState.scissorEnable)
+ for (uint32_t index = 0; index < numScissors; ++index)
{
- pState->scissorInFixedPoint = pState->scissorRects[0];
- }
- else
- {
- // the vp width and height must be added to origin un-rounded then the result round to -inf.
- // The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
- pState->scissorInFixedPoint.xmin = (int32_t)pState->vp[0].x;
- pState->scissorInFixedPoint.xmax = (int32_t)(pState->vp[0].x + pState->vp[0].width);
- pState->scissorInFixedPoint.ymin = (int32_t)pState->vp[0].y;
- pState->scissorInFixedPoint.ymax = (int32_t)(pState->vp[0].y + pState->vp[0].height);
- }
+ SWR_RECT &scissorInFixedPoint = pState->scissorsInFixedPoint[index];
- // Clamp to max rect
- pState->scissorInFixedPoint &= g_MaxScissorRect;
+ // Set up scissor dimensions based on scissor or viewport
+ if (pState->rastState.scissorEnable)
+ {
+ scissorInFixedPoint = pState->scissorRects[index];
+ }
+ else
+ {
+ // the vp width and height must be added to origin un-rounded then the result round to -inf.
+ // The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
+ scissorInFixedPoint.xmin = (int32_t)pState->vp[index].x;
+ scissorInFixedPoint.xmax = (int32_t)(pState->vp[index].x + pState->vp[index].width);
+ scissorInFixedPoint.ymin = (int32_t)pState->vp[index].y;
+ scissorInFixedPoint.ymax = (int32_t)(pState->vp[index].y + pState->vp[index].height);
+ }
+
+ // Clamp to max rect
+ scissorInFixedPoint &= g_MaxScissorRect;
+
+ // Test for tile alignment
+ bool tileAligned;
+ tileAligned = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0;
+ tileAligned &= (scissorInFixedPoint.ymin % KNOB_TILE_Y_DIM) == 0;
+ tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_X_DIM) == 0;
+ tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_Y_DIM) == 0;
+
+ pState->scissorsTileAligned &= tileAligned;
- // Scale to fixed point
- pState->scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
- pState->scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
- pState->scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
- pState->scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
+ // Scale to fixed point
+ scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
+ scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
+ scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
+ scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
- // Make scissor inclusive
- pState->scissorInFixedPoint.xmax -= 1;
- pState->scissorInFixedPoint.ymax -= 1;
+ // Make scissor inclusive
+ scissorInFixedPoint.xmax -= 1;
+ scissorInFixedPoint.ymax -= 1;
+ }
+
+
}
// templated backend function tables
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 1e4dca2fe25..7dd6c0db3de 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -493,14 +493,14 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
if(T::bCanEarlyZ)
{
RDTSC_START(BEEarlyDepthTest);
- depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
- psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
+ psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if(pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
{
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
if (!_simd_movemask_ps(depthPassMask))
@@ -525,14 +525,14 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
if(!T::bCanEarlyZ)
{
RDTSC_START(BELateDepthTest);
- depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
RDTSC_STOP(BELateDepthTest, 0, 0);
if(!_simd_movemask_ps(depthPassMask))
{
// need to call depth/stencil write for stencil write
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
goto Endtile;
}
@@ -549,7 +549,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// do final depth write after all pixel kills
if (!pPSState->forceEarlyZ)
{
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
}
RDTSC_STOP(BEOutputMerger, 0, 0);
@@ -712,14 +712,14 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
if (T::bCanEarlyZ)
{
RDTSC_START(BEEarlyDepthTest);
- depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
// early-exit if no samples passed depth or earlyZ is forced on.
if (pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
{
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
if (!_simd_movemask_ps(depthPassMask))
@@ -745,14 +745,14 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
if (!T::bCanEarlyZ)
{
RDTSC_START(BELateDepthTest);
- depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
RDTSC_STOP(BELateDepthTest, 0, 0);
if (!_simd_movemask_ps(depthPassMask))
{
// need to call depth/stencil write for stencil write
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
@@ -771,7 +771,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// do final depth write after all pixel kills
if (!pPSState->forceEarlyZ)
{
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
}
RDTSC_STOP(BEOutputMerger, 0, 0);
@@ -984,7 +984,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
}
RDTSC_STOP(BEOutputMerger, 0, 0);
@@ -1093,9 +1093,9 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
RDTSC_START(BEEarlyDepthTest);
- simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
+ simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index 27851a1156c..fde5a3f8d9f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -491,14 +491,15 @@ struct PixelRateZTestLoop
RDTSC_START(BEDepthBucket);
depthPassMask[sample] = vCoverageMask[sample];
stencilPassMask[sample] = vCoverageMask[sample];
- depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, vZ[sample], pDepthSample,
- vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]);
+ depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
+ vZ[sample], pDepthSample, vCoverageMask[sample],
+ pStencilSample, &stencilPassMask[sample]);
RDTSC_STOP(BEDepthBucket, 0, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
{
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample],
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample],
pDepthSample, depthPassMask[sample], vCoverageMask[sample], pStencilSample, stencilPassMask[sample]);
if(!_simd_movemask_ps(depthPassMask[sample]))
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 81820530024..c311cb8cab0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -63,6 +63,7 @@ struct TRI_FLAGS
float pointSize;
uint32_t primID;
uint32_t renderTargetArrayIndex;
+ uint32_t viewportIndex;
};
//////////////////////////////////////////////////////////////////////////
@@ -274,7 +275,8 @@ OSALIGNLINE(struct) API_STATE
SWR_VIEWPORT_MATRICES vpMatrices;
SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
- SWR_RECT scissorInFixedPoint;
+ SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
+ bool scissorsTileAligned;
// Backend state
SWR_BACKEND_STATE backendState;
diff --git a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
index 7b55580bf0a..590c569030a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
+++ b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
@@ -117,14 +117,14 @@ simdscalar QuantizeDepth(simdscalar depth)
INLINE
simdscalar DepthStencilTest(const API_STATE* pState,
- bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, uint8_t *pStencilBase,
- simdscalar* pStencilMask)
+ bool frontFacing, uint32_t viewportIndex, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask,
+ uint8_t *pStencilBase, simdscalar* pStencilMask)
{
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
- const SWR_VIEWPORT* pViewport = &pState->vp[0];
+ const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
simdscalar depthResult = _simd_set1_ps(-1.0f);
simdscalar zbuf;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 04c62adbc5a..a49ec7a9fbb 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -465,6 +465,70 @@ static INLINE simdscalari GenerateMask(uint32_t numItemsRemaining)
return _simd_castps_si(vMask(mask));
}
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Gather scissor rect data based on per-prim viewport indices.
+/// @param pScissorsInFixedPoint - array of scissor rects in 16.8 fixed point.
+/// @param pViewportIndex - array of per-primitive vewport indexes.
+/// @param scisXmin - output vector of per-prmitive scissor rect Xmin data.
+/// @param scisYmin - output vector of per-prmitive scissor rect Ymin data.
+/// @param scisXmax - output vector of per-prmitive scissor rect Xmax data.
+/// @param scisYmax - output vector of per-prmitive scissor rect Ymax data.
+//
+/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
+template<size_t SimdWidth>
+struct GatherScissors
+{
+ static void Gather(const SWR_RECT* pScissorsInFixedPoint, const uint32_t* pViewportIndex,
+ simdscalari &scisXmin, simdscalari &scisYmin,
+ simdscalari &scisXmax, simdscalari &scisYmax)
+ {
+ SWR_ASSERT(0, "Unhandled Simd Width in Scissor Rect Gather");
+ }
+};
+
+template<>
+struct GatherScissors<8>
+{
+ static void Gather(const SWR_RECT* pScissorsInFixedPoint, const uint32_t* pViewportIndex,
+ simdscalari &scisXmin, simdscalari &scisYmin,
+ simdscalari &scisXmax, simdscalari &scisYmax)
+ {
+ scisXmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[0]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[1]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[2]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[3]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[4]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[5]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[6]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[7]].xmin);
+ scisYmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[0]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[1]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[2]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[3]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[4]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[5]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[6]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[7]].ymin);
+ scisXmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[0]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[1]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[2]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[3]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[4]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[5]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[6]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[7]].xmax);
+ scisYmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[0]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[1]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[2]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[3]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[4]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[5]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[6]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[7]].ymax);
+ }
+};
+
//////////////////////////////////////////////////////////////////////////
/// @brief StreamOut - Streams vertex data out to SO buffers.
/// Generally, we are only streaming out a SIMDs worth of triangles.
@@ -1849,6 +1913,7 @@ void BinTriangles(
// compute per tri backface
uint32_t frontFaceMask = frontWindingTris;
uint32_t *pPrimID = (uint32_t *)&primID;
+ const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
DWORD triIndex = 0;
// for center sample pattern, all samples are at pixel center; calculate coverage
// once at center and broadcast the results in the backend
@@ -1944,10 +2009,26 @@ void BinTriangles(
}
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
- bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
- bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
- bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
- bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
+ // Gather the AOS effective scissor rects based on the per-prim VP index.
+ /// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
+ simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
+ if (state.gsState.emitsViewportArrayIndex)
+ {
+ GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
+ scisXmin, scisYmin, scisXmax, scisYmax);
+ }
+ else // broadcast fast path for non-VPAI case.
+ {
+ scisXmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmin);
+ scisYmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymin);
+ scisXmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmax);
+ scisYmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymax);
+ }
+
+ bbox.xmin = _simd_max_epi32(bbox.xmin, scisXmin);
+ bbox.ymin = _simd_max_epi32(bbox.ymin, scisYmin);
+ bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), scisXmax);
+ bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), scisYmax);
if(CT::IsConservativeT::value)
{
@@ -2044,7 +2125,8 @@ void BinTriangles(
desc.triFlags.frontFacing = state.forceFront ? 1 : ((frontFaceMask >> triIndex) & 1);
desc.triFlags.primID = pPrimID[triIndex];
desc.triFlags.renderTargetArrayIndex = aRTAI[triIndex];
-
+ desc.triFlags.viewportIndex = pViewportIndex[triIndex];
+
auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
@@ -2130,6 +2212,7 @@ void BinPoints(
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_GS_STATE& gsState = state.gsState;
const SWR_RASTSTATE& rastState = state.rastState;
+ const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
// Select attribute processor
PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1,
@@ -2240,6 +2323,7 @@ void BinPoints(
desc.triFlags.frontFacing = 1;
desc.triFlags.primID = pPrimID[primIndex];
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
+ desc.triFlags.viewportIndex = pViewportIndex[primIndex];
work.pfnWork = RasterizeSimplePoint;
@@ -2306,10 +2390,26 @@ void BinPoints(
bbox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi);
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
- bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
- bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
- bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
- bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
+ // Gather the AOS effective scissor rects based on the per-prim VP index.
+ /// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
+ simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
+ if (state.gsState.emitsViewportArrayIndex)
+ {
+ GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
+ scisXmin, scisYmin, scisXmax, scisYmax);
+ }
+ else // broadcast fast path for non-VPAI case.
+ {
+ scisXmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmin);
+ scisYmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymin);
+ scisXmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmax);
+ scisYmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymax);
+ }
+
+ bbox.xmin = _simd_max_epi32(bbox.xmin, scisXmin);
+ bbox.ymin = _simd_max_epi32(bbox.ymin, scisYmin);
+ bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), scisXmax);
+ bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), scisYmax);
// Cull bloated points completely outside scissor
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
@@ -2374,6 +2474,7 @@ void BinPoints(
desc.triFlags.primID = pPrimID[primIndex];
desc.triFlags.pointSize = aPointSize[primIndex];
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
+ desc.triFlags.viewportIndex = pViewportIndex[primIndex];
work.pfnWork = RasterizeTriPoint;
@@ -2431,6 +2532,7 @@ void BinPoints(
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param tri - Contains line position data for SIMDs worth of points.
/// @param primID - Primitive ID for each line.
+/// @param viewportIdx - Viewport Array Index for each line.
void BinLines(
DRAW_CONTEXT *pDC,
PA_STATE& pa,
@@ -2508,6 +2610,7 @@ void BinLines(
primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vZeroLengthMask));
uint32_t *pPrimID = (uint32_t *)&primID;
+ const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
simdscalar vUnused = _simd_setzero_ps();
@@ -2533,10 +2636,24 @@ void BinLines(
bbox.ymax = _simd_blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask);
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
- bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
- bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
- bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
- bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
+ simdscalari scisXmin, scisYmin, scisXmax, scisYmax;
+ if (state.gsState.emitsViewportArrayIndex)
+ {
+ GatherScissors<KNOB_SIMD_WIDTH>::Gather(&state.scissorsInFixedPoint[0], pViewportIndex,
+ scisXmin, scisYmin, scisXmax, scisYmax);
+ }
+ else // broadcast fast path for non-VPAI case.
+ {
+ scisXmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmin);
+ scisYmin = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymin);
+ scisXmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].xmax);
+ scisYmax = _simd_set1_epi32(state.scissorsInFixedPoint[0].ymax);
+ }
+
+ bbox.xmin = _simd_max_epi32(bbox.xmin, scisXmin);
+ bbox.ymin = _simd_max_epi32(bbox.ymin, scisYmin);
+ bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), scisXmax);
+ bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), scisYmax);
// Cull prims completely outside scissor
{
@@ -2602,6 +2719,7 @@ void BinLines(
desc.triFlags.primID = pPrimID[primIndex];
desc.triFlags.yMajor = (yMajorMask >> primIndex) & 1;
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
+ desc.triFlags.viewportIndex = pViewportIndex[primIndex];
work.pfnWork = RasterizeLine;
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
index 9a8d062818d..66283e340d6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
@@ -967,20 +967,22 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
OSALIGNSIMD(SWR_RECT) bbox;
calcBoundingBoxInt(vXi, vYi, bbox);
+ const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
+
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
{
// If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
bbox.xmin--; bbox.xmax++; bbox.ymin--; bbox.ymax++;
- SWR_ASSERT(state.scissorInFixedPoint.xmin >= 0 && state.scissorInFixedPoint.ymin >= 0,
+ SWR_ASSERT(scissorInFixedPoint.xmin >= 0 && scissorInFixedPoint.ymin >= 0,
"Conservative rast degenerate handling requires a valid scissor rect");
}
// Intersect with scissor/viewport
OSALIGNSIMD(SWR_RECT) intersect;
- intersect.xmin = std::max(bbox.xmin, state.scissorInFixedPoint.xmin);
- intersect.xmax = std::min(bbox.xmax - 1, state.scissorInFixedPoint.xmax);
- intersect.ymin = std::max(bbox.ymin, state.scissorInFixedPoint.ymin);
- intersect.ymax = std::min(bbox.ymax - 1, state.scissorInFixedPoint.ymax);
+ intersect.xmin = std::max(bbox.xmin, scissorInFixedPoint.xmin);
+ intersect.xmax = std::min(bbox.xmax - 1, scissorInFixedPoint.xmax);
+ intersect.ymin = std::max(bbox.ymin, scissorInFixedPoint.ymin);
+ intersect.ymax = std::min(bbox.ymax - 1, scissorInFixedPoint.ymax);
triDesc.triFlags = workDesc.triFlags;
@@ -1087,7 +1089,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
// Compute and store triangle edge data if scissor needs to rasterized
ComputeScissorEdges<typename RT::RasterizeScissorEdgesT, typename RT::IsConservativeT, RT>
- (bbox, state.scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
+ (bbox, scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
// Evaluate edge equations at sample positions of each of the 4 corners of a raster tile
// used to for testing if entire raster tile is inside a triangle
@@ -1573,6 +1575,8 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
+ const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
+
// create a copy of the triangle buffer to write our adjusted vertices to
OSALIGNSIMD(float) newTriBuffer[4 * 4];
TRIANGLE_WORK_DESC newWorkDesc = workDesc;
@@ -1667,13 +1671,13 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
calcBoundingBoxInt(vXai, vYai, bboxA);
if (!(bboxA.xmin > macroBoxRight ||
- bboxA.xmin > state.scissorInFixedPoint.xmax ||
+ bboxA.xmin > scissorInFixedPoint.xmax ||
bboxA.xmax - 1 < macroBoxLeft ||
- bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
+ bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
bboxA.ymin > macroBoxBottom ||
- bboxA.ymin > state.scissorInFixedPoint.ymax ||
+ bboxA.ymin > scissorInFixedPoint.ymax ||
bboxA.ymax - 1 < macroBoxTop ||
- bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
+ bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
// rasterize triangle
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
@@ -1740,13 +1744,13 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
calcBoundingBoxInt(vXai, vYai, bboxA);
if (!(bboxA.xmin > macroBoxRight ||
- bboxA.xmin > state.scissorInFixedPoint.xmax ||
+ bboxA.xmin > scissorInFixedPoint.xmax ||
bboxA.xmax - 1 < macroBoxLeft ||
- bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
+ bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
bboxA.ymin > macroBoxBottom ||
- bboxA.ymin > state.scissorInFixedPoint.ymax ||
+ bboxA.ymin > scissorInFixedPoint.ymax ||
bboxA.ymax - 1 < macroBoxTop ||
- bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
+ bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
// rasterize triangle
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}