summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-12-11 15:51:46 -0600
committerTim Rowley <[email protected]>2017-12-15 10:56:40 -0600
commit182cc51a50492926ebf72d4cd38f1e574c768e72 (patch)
tree97dd31aede608e73173969aeb272c7a78f34c102
parentca59b2e75ccb0de2ef7f72751a52b035d060d1bc (diff)
swr/rast: Pull of RTAI gather & offset out of clip/bin code
Reviewed-by: Bruce Cherniak <[email protected]>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/binner.cpp118
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.cpp30
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h35
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp153
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.h8
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/pa.h1
7 files changed, 203 insertions, 146 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index a664ed812fe..7ef87c4443d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -45,7 +45,8 @@ void BinPostSetupLinesImpl(
typename SIMD_T::Float recipW[],
uint32_t primMask,
typename SIMD_T::Integer const &primID,
- typename SIMD_T::Integer const &viewportIdx);
+ typename SIMD_T::Integer const &viewportIdx,
+ typename SIMD_T::Integer const &rtIdx);
template <typename SIMD_T, uint32_t SIMD_WIDTH>
void BinPostSetupPointsImpl(
@@ -55,7 +56,8 @@ void BinPostSetupPointsImpl(
typename SIMD_T::Vec4 prim[],
uint32_t primMask,
typename SIMD_T::Integer const &primID,
- typename SIMD_T::Integer const &viewportIdx);
+ typename SIMD_T::Integer const &viewportIdx,
+ typename SIMD_T::Integer const &rtIdx);
//////////////////////////////////////////////////////////////////////////
/// @brief Processes attributes for the backend based on linkage mask and
@@ -308,9 +310,11 @@ void SIMDCALL BinTrianglesImpl(
typename SIMD_T::Vec4 tri[3],
uint32_t triMask,
typename SIMD_T::Integer const &primID,
- typename SIMD_T::Integer const &viewportIdx)
+ typename SIMD_T::Integer const &viewportIdx,
+ typename SIMD_T::Integer const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
+ const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
AR_BEGIN(FEBinTriangles, pDC->drawId);
@@ -604,21 +608,21 @@ endBinTriangles:
recipW[0] = vRecipW0;
recipW[1] = vRecipW1;
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx);
+ BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
line[0] = tri[1];
line[1] = tri[2];
recipW[0] = vRecipW1;
recipW[1] = vRecipW2;
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx);
+ BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
line[0] = tri[2];
line[1] = tri[0];
recipW[0] = vRecipW2;
recipW[1] = vRecipW0;
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx);
+ BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
AR_END(FEBinTriangles, 1);
return;
@@ -626,9 +630,9 @@ endBinTriangles:
else if (rastState.fillMode == SWR_FILLMODE_POINT)
{
// Bin 3 points
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx);
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx);
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx);
+ BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx, rtIdx);
+ BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx);
+ BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
AR_END(FEBinTriangles, 1);
return;
@@ -659,22 +663,6 @@ endBinTriangles:
TransposeVertices(vHorizZ, tri[0].z, tri[1].z, tri[2].z);
TransposeVertices(vHorizW, vRecipW0, vRecipW1, vRecipW2);
- // store render target array index
- OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH];
- if (state.backendState.readRenderTargetArrayIndex)
- {
- typename SIMD_T::Vec4 vRtai[3];
- pa.Assemble(VERTEX_SGV_SLOT, vRtai);
- typename SIMD_T::Integer vRtaii;
- vRtaii = SIMD_T::castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
- SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), vRtaii);
- }
- else
- {
- SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), SIMD_T::setzero_si());
- }
-
-
// scan remaining valid triangles and bin each separately
while (_BitScanForward(&triIndex, triMask))
{
@@ -763,9 +751,10 @@ void BinTriangles(
simdvector tri[3],
uint32_t triMask,
simdscalari const &primID,
- simdscalari const &viewportIdx)
+ simdscalari const &viewportIdx,
+ simdscalari const &rtIdx)
{
- BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx);
+ BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
}
#if USE_SIMD16_FRONTEND
@@ -777,9 +766,10 @@ void SIMDCALL BinTriangles_simd16(
simd16vector tri[3],
uint32_t triMask,
simd16scalari const &primID,
- simd16scalari const &viewportIdx)
+ simd16scalari const &viewportIdx,
+ simd16scalari const &rtIdx)
{
- BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx);
+ BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
}
#endif
@@ -828,7 +818,8 @@ void BinPostSetupPointsImpl(
typename SIMD_T::Vec4 prim[],
uint32_t primMask,
typename SIMD_T::Integer const &primID,
- typename SIMD_T::Integer const &viewportIdx)
+ typename SIMD_T::Integer const &viewportIdx,
+ typename SIMD_T::Integer const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
@@ -896,19 +887,8 @@ void BinPostSetupPointsImpl(
SIMD_T::store_ps(reinterpret_cast<float *>(aZ), primVerts.z);
// store render target array index
- OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH];
- if (state.backendState.readRenderTargetArrayIndex)
- {
- typename SIMD_T::Vec4 vRtai;
- pa.Assemble(VERTEX_SGV_SLOT, &vRtai);
- typename SIMD_T::Integer vRtaii = SIMD_T::castps_si(vRtai[VERTEX_SGV_RTAI_COMP]);
- SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), vRtaii);
- }
- else
- {
- SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), SIMD_T::setzero_si());
- }
-
+ const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
+
uint32_t *pPrimID = (uint32_t *)&primID;
DWORD primIndex = 0;
@@ -1155,7 +1135,8 @@ void BinPointsImpl(
typename SIMD_T::Vec4 prim[3],
uint32_t primMask,
typename SIMD_T::Integer const &primID,
- typename SIMD_T::Integer const &viewportIdx)
+ typename SIMD_T::Integer const &viewportIdx,
+ typename SIMD_T::Integer const &rtIdx)
{
const API_STATE& state = GetApiState(pDC);
const SWR_FRONTEND_STATE& feState = state.frontendState;
@@ -1193,7 +1174,8 @@ void BinPointsImpl(
prim,
primMask,
primID,
- viewportIdx);
+ viewportIdx,
+ rtIdx);
}
void BinPoints(
@@ -1203,7 +1185,8 @@ void BinPoints(
simdvector prim[3],
uint32_t primMask,
simdscalari const &primID,
- simdscalari const &viewportIdx)
+ simdscalari const &viewportIdx,
+ simdscalari const &rtIdx)
{
BinPointsImpl<SIMD256, KNOB_SIMD_WIDTH>(
pDC,
@@ -1212,7 +1195,8 @@ void BinPoints(
prim,
primMask,
primID,
- viewportIdx);
+ viewportIdx,
+ rtIdx);
}
#if USE_SIMD16_FRONTEND
@@ -1223,7 +1207,8 @@ void SIMDCALL BinPoints_simd16(
simd16vector prim[3],
uint32_t primMask,
simd16scalari const &primID,
- simd16scalari const &viewportIdx)
+ simd16scalari const &viewportIdx,
+ simd16scalari const & rtIdx)
{
BinPointsImpl<SIMD512, KNOB_SIMD16_WIDTH>(
pDC,
@@ -1232,7 +1217,8 @@ void SIMDCALL BinPoints_simd16(
prim,
primMask,
primID,
- viewportIdx);
+ viewportIdx,
+ rtIdx);
}
#endif
@@ -1253,9 +1239,11 @@ void BinPostSetupLinesImpl(
typename SIMD_T::Float recipW[],
uint32_t primMask,
typename SIMD_T::Integer const &primID,
- typename SIMD_T::Integer const &viewportIdx)
+ typename SIMD_T::Integer const &viewportIdx,
+ typename SIMD_T::Integer const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
+ const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
AR_BEGIN(FEBinLines, pDC->drawId);
@@ -1376,20 +1364,6 @@ void BinPostSetupLinesImpl(
TransposeVertices(vHorizZ, prim[0].z, prim[1].z, SIMD_T::setzero_ps());
TransposeVertices(vHorizW, vRecipW0, vRecipW1, SIMD_T::setzero_ps());
- // store render target array index
- OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH];
- if (state.backendState.readRenderTargetArrayIndex)
- {
- typename SIMD_T::Vec4 vRtai[2];
- pa.Assemble(VERTEX_SGV_SLOT, vRtai);
- typename SIMD_T::Integer vRtaii = SIMD_T::castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
- SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), vRtaii);
- }
- else
- {
- SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), SIMD_T::setzero_si());
- }
-
// scan remaining valid prims and bin each separately
DWORD primIndex;
while (_BitScanForward(&primIndex, primMask))
@@ -1471,7 +1445,8 @@ void SIMDCALL BinLinesImpl(
typename SIMD_T::Vec4 prim[3],
uint32_t primMask,
typename SIMD_T::Integer const &primID,
- typename SIMD_T::Integer const &viewportIdx)
+ typename SIMD_T::Integer const &viewportIdx,
+ typename SIMD_T::Integer const & rtIdx)
{
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
@@ -1522,7 +1497,8 @@ void SIMDCALL BinLinesImpl(
vRecipW,
primMask,
primID,
- viewportIdx);
+ viewportIdx,
+ rtIdx);
}
void BinLines(
@@ -1532,9 +1508,10 @@ void BinLines(
simdvector prim[],
uint32_t primMask,
simdscalari const &primID,
- simdscalari const &viewportIdx)
+ simdscalari const &viewportIdx,
+ simdscalari const &rtIdx)
{
- BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx);
+ BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
}
#if USE_SIMD16_FRONTEND
@@ -1545,9 +1522,10 @@ void SIMDCALL BinLines_simd16(
simd16vector prim[3],
uint32_t primMask,
simd16scalari const &primID,
- simd16scalari const &viewportIdx)
+ simd16scalari const &viewportIdx,
+ simd16scalari const &rtIdx)
{
- BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx);
+ BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
}
#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index d4da2c3badf..72058029b03 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -160,35 +160,39 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
return i;
}
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+ simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<SIMD256, 3> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
AR_END(FEClipTriangles, 1);
}
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+ simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipLines, pDC->drawId);
Clipper<SIMD256, 2> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
AR_END(FEClipLines, 1);
}
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+ simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipPoints, pDC->drawId);
Clipper<SIMD256, 1> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
AR_END(FEClipPoints, 1);
}
#if USE_SIMD16_FRONTEND
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+ simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipTriangles, pDC->drawId);
@@ -198,12 +202,13 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
AR_END(FEClipTriangles, 1);
}
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+ simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipLines, pDC->drawId);
@@ -213,12 +218,13 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
AR_END(FEClipLines, 1);
}
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+ simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(FEClipPoints, pDC->drawId);
@@ -228,7 +234,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t worker
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
AR_END(FEClipPoints, 1);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 8b947668d3f..e5e00d49b02 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -178,11 +178,11 @@ struct BinnerChooser<SIMD256>
};
}
- void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx)
+ void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx, SIMD256::Integer &rtIdx)
{
SWR_ASSERT(pfnBinFunc != nullptr);
- pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx);
+ pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx, rtIdx);
}
};
@@ -231,11 +231,11 @@ struct BinnerChooser<SIMD512>
};
}
- void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx)
+ void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx, SIMD512::Integer &rtIdx)
{
SWR_ASSERT(pfnBinFunc != nullptr);
- pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx);
+ pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx, rtIdx);
}
};
@@ -437,7 +437,8 @@ public:
return SIMD_T::movemask_ps(vClipCullMask);
}
- void ClipSimd(const typename SIMD_T::Vec4 prim[], const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx)
+ void ClipSimd(const typename SIMD_T::Vec4 prim[], const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa,
+ const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx, const typename SIMD_T::Integer &vRtIdx)
{
// input/output vertex store for clipper
SIMDVERTEX_T<SIMD_T> vertices[7]; // maximum 7 verts generated per triangle
@@ -538,6 +539,7 @@ public:
const uint32_t *pVertexCount = reinterpret_cast<const uint32_t *>(&vNumClippedVerts);
const uint32_t *pPrimitiveId = reinterpret_cast<const uint32_t *>(&vPrimId);
const uint32_t *pViewportIdx = reinterpret_cast<const uint32_t *>(&vViewportIdx);
+ const uint32_t *pRtIdx = reinterpret_cast<const uint32_t *>(&vRtIdx);
const SIMD256::Integer vOffsets = SIMD256::set_epi32(
0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
@@ -651,6 +653,8 @@ public:
const typename SIMD_T::Integer primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
const typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]);
+ const typename SIMD_T::Integer rtIdx = SIMD_T::set1_epi32(pRtIdx[inputPrim]);
+
while (clipPA.GetNextStreamOutput())
{
@@ -662,7 +666,7 @@ public:
if (assemble)
{
- binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx);
+ binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx, rtIdx);
}
} while (clipPA.NextPrim());
@@ -677,7 +681,8 @@ public:
UPDATE_STAT_FE(CPrimitives, numClippedPrims);
}
- void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx)
+ void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask,
+ typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx, typename SIMD_T::Integer const &rtIdx)
{
SWR_ASSERT(pa.pDC != nullptr);
@@ -716,7 +721,7 @@ public:
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx);
+ ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx, rtIdx);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)
@@ -725,7 +730,7 @@ public:
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
- binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
+ binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx, rtIdx);
}
}
@@ -1135,12 +1140,12 @@ private:
// pipeline stage functions
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
#if USE_SIMD16_FRONTEND
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index ef6719384ff..cba8de999be 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -214,12 +214,12 @@ struct PA_STATE;
// function signature for pipeline stages that execute after primitive assembly
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
- uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
+ uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
#if ENABLE_AVX512_SIMD16
// function signature for pipeline stages that execute after primitive assembly
typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
- uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
+ uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
#endif
OSALIGNLINE(struct) API_STATE
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 3de79d600f6..ed8ce151c39 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -989,15 +989,27 @@ static void GeometryShaderStage(
#if USE_SIMD16_FRONTEND
simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
- // Gather the VPAI from the SVG if provided.
- SIMD16::Vec4 vpiAttrib[3];
- SIMD16::Integer vViewportIdx = SIMD16::setzero_si();
+ // Gather data from the SVG if provided.
+ simd16scalari vViewportIdx = SIMD16::setzero_si();
+ simd16scalari vRtIdx = SIMD16::setzero_si();
+ SIMD16::Vec4 svgAttrib[4];
+
+ if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ {
+ gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+ }
+
+
if (state.backendState.readViewportArrayIndex)
{
- gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- vViewportIdx = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
gsPa.viewportArrayActive = true;
}
+ if (state.backendState.readRenderTargetArrayIndex)
+ {
+ vRtIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ gsPa.rtArrayActive = true;
+ }
{
// OOB VPAI indices => forced to zero.
@@ -1007,29 +1019,40 @@ static void GeometryShaderStage(
vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
gsPa.useAlternateOffset = false;
- pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx);
+ pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
}
#else
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
- // Gather the VPAI from the SVG if provided.
- SIMD8::Vec4 vpiAttrib[3];
- SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+ // Gather data from the SVG if provided.
+ simdscalari vViewportIdx = SIMD16::setzero_si();
+ simdscalari vRtIdx = SIMD16::setzero_si();
+ SIMD8::Vec4 svgAttrib[4];
+
+ if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ {
+ tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+ }
+
+
if (state.backendState.readViewportArrayIndex)
{
- gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB VPAI indices => forced to zero.
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
-
- gsPa.viewportArrayActive = true;
+ tessPa.viewportArrayActive = true;
+ }
+ if (state.backendState.readRenderTargetArrayIndex)
+ {
+ vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ tessPa.rtArrayActive = true;
}
- pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx);
+ pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
#endif
}
}
@@ -1372,46 +1395,68 @@ static void TessellationStages(
SWR_ASSERT(pfnClipFunc);
#if USE_SIMD16_FRONTEND
- // Gather the VPAI from the SVG if provided.
- simd16scalari vpai = SIMD16::setzero_si();
+ // Gather data from the SVG if provided.
+ simd16scalari vViewportIdx = SIMD16::setzero_si();
+ simd16scalari vRtIdx = SIMD16::setzero_si();
+ SIMD16::Vec4 svgAttrib[4];
+
+ if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ {
+ tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+ }
+
+
if (state.backendState.readViewportArrayIndex)
{
- simd16vector vpiAttrib[4];
- tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
tessPa.viewportArrayActive = true;
}
+ if (state.backendState.readRenderTargetArrayIndex)
+ {
+ vRtIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ tessPa.rtArrayActive = true;
+ }
{
// OOB VPAI indices => forced to zero.
- vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
+ vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports);
- vpai = SIMD16::and_si(vClearMask, vpai);
+ simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
+ vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
tessPa.useAlternateOffset = false;
- pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vpai);
+ pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vViewportIdx, vRtIdx);
}
#else
- // Gather the VPAI from the SVG if provided.
- SIMD8::Vec4 vpiAttrib[3];
- SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+ // Gather data from the SVG if provided.
+ simdscalari vViewportIdx = SIMD16::setzero_si();
+ simdscalari vRtIdx = SIMD16::setzero_si();
+ SIMD8::Vec4 svgAttrib[4];
+
+ if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ {
+ tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+ }
+
if (state.backendState.readViewportArrayIndex)
{
- tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB VPAI indices => forced to zero.
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
-
tessPa.viewportArrayActive = true;
}
+ if (state.backendState.readRenderTargetArrayIndex)
+ {
+ vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ tessPa.rtArrayActive = true;
+ }
pfnClipFunc(pDC, tessPa, workerId, prim,
- GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx);
+ GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx, vRtIdx);
#endif
}
}
@@ -1803,15 +1848,27 @@ void ProcessDraw(
if (HasRastT::value)
{
SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
- // Gather the VPAI from the SVG if provided.
+ // Gather data from the SVG if provided.
simd16scalari vpai = SIMD16::setzero_si();
+ simd16scalari rtai = SIMD16::setzero_si();
+ SIMD16::Vec4 svgAttrib[4];
+
+ if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ {
+ pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+ }
+
+
if (state.backendState.readViewportArrayIndex)
{
- simd16vector vpiAttrib[4];
- pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vpai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
pa.viewportArrayActive = true;
}
+ if (state.backendState.readRenderTargetArrayIndex)
+ {
+ rtai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ pa.rtArrayActive = true;
+ }
{
// OOB VPAI indices => forced to zero.
@@ -1821,7 +1878,7 @@ void ProcessDraw(
vpai = SIMD16::and_si(vClearMask, vpai);
pa.useAlternateOffset = false;
- pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai);
+ pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai, rtai);
}
}
}
@@ -1983,25 +2040,35 @@ void ProcessDraw(
{
SWR_ASSERT(pDC->pState->pfnProcessPrims);
- // Gather the VPAI from the SVG if provided.
- SIMD8::Vec4 vpiAttrib[3];
- SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+ // Gather data from the SVG if provided.
+ simdscalari vViewportIdx = SIMD16::setzero_si();
+ simdscalari vRtIdx = SIMD16::setzero_si();
+ SIMD8::Vec4 svgAttrib[4];
+
+ if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ {
+ tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+ }
+
if (state.backendState.readViewportArrayIndex)
{
- pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
- vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB VPAI indices => forced to zero.
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
-
- pa.viewportArrayActive = true;
+ tessPa.viewportArrayActive = true;
+ }
+ if (state.backendState.readRenderTargetArrayIndex)
+ {
+ vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ tessPa.rtArrayActive = true;
}
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
- GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx);
+ GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx, vRtIdx);
}
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index e2ca1274c51..6a2ec8474f1 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -389,10 +389,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
#endif
struct PA_STATE_BASE; // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
+void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
#if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
-void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
+void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index a0160d4a40c..c88b4bfd97d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -80,6 +80,7 @@ struct PA_STATE
#endif
bool viewportArrayActive{ false };
+ bool rtArrayActive { false };
uint32_t numVertsPerPrim{ 0 };
PA_STATE(){}