aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.cpp12
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h17
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp62
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.h24
6 files changed, 91 insertions, 27 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index d6aa80d678f..15485012a08 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -676,7 +676,6 @@ void SwrSetViewports(
if (pMatrices != nullptr)
{
- //memcpy(&pState->vpMatrix[0], pMatrices, sizeof(SWR_VIEWPORT_MATRIX) * numViewports);
// @todo Faster to copy portions of the SOA or just copy all of it?
memcpy(&pState->vpMatrices, pMatrices, sizeof(SWR_VIEWPORT_MATRICES));
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index e624fd8f674..21cbb0a0629 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -179,26 +179,26 @@ void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float *
return;
}
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
RDTSC_START(FEClipTriangles);
Clipper<3> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
RDTSC_STOP(FEClipTriangles, 1, 0);
}
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
RDTSC_START(FEClipLines);
Clipper<2> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
RDTSC_STOP(FEClipLines, 1, 0);
}
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
RDTSC_START(FEClipPoints);
Clipper<1> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
RDTSC_STOP(FEClipPoints, 1, 0);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index a2ba76967fe..b173ae59b45 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -302,7 +302,7 @@ public:
}
// clip SIMD primitives
- void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId)
+ void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
{
// input/output vertex store for clipper
simdvertex vertices[7]; // maximum 7 verts generated per triangle
@@ -402,6 +402,7 @@ public:
uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
+ uint32_t* pViewportIdx = (uint32_t*)&vViewportIdx;
const simdscalari vOffsets = _mm256_set_epi32(
0 * sizeof(simdvertex), // unused lane
@@ -487,7 +488,7 @@ public:
if (assemble)
{
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
- pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
+ pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]), _simd_set1_epi32(pViewportIdx[inputPrim]));
}
} while (clipPa.NextPrim());
}
@@ -499,7 +500,7 @@ public:
}
// execute the clipper stage
- void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId)
+ void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
// set up binner based on PA state
PFN_PROCESS_PRIMS pfnBinner;
@@ -552,7 +553,7 @@ public:
RDTSC_START(FEGuardbandClip);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(vMask(primMask), vMask(clipMask), pa, primId);
+ ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx);
RDTSC_STOP(FEGuardbandClip, 1, 0);
}
else if (validMask)
@@ -562,7 +563,7 @@ public:
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
- pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
+ pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
}
}
@@ -948,6 +949,6 @@ private:
// pipeline stage functions
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx);
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 144fcefb208..320aa924c4f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -215,7 +215,7 @@ struct PA_STATE;
// function signature for pipeline stages that execute after primitive assembly
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
- uint32_t primMask, simdscalari primID);
+ uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
OSALIGNLINE(struct) API_STATE
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 3014c7defc8..a62aa966c01 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -833,7 +833,26 @@ static void GeometryShaderStage(
vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
}
- pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId);
+ // use viewport array index if GS declares it as an output attribute. Otherwise use index 0.
+ simdscalari vViewPortIdx;
+ if (state.gsState.emitsViewportArrayIndex)
+ {
+ simdvector vpiAttrib[3];
+ gsPa.Assemble(VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, vpiAttrib);
+
+ // OOB indices => forced to zero.
+ simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalar vClearMask = _simd_cmplt_ps(vpiAttrib[0].x, _simd_castsi_ps(vNumViewports));
+ vpiAttrib[0].x = _simd_and_ps(vClearMask, vpiAttrib[0].x);
+
+ vViewPortIdx = _simd_castps_si(vpiAttrib[0].x);
+ }
+ else
+ {
+ vViewPortIdx = _simd_set1_epi32(0);
+ }
+
+ pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx);
}
}
} while (gsPa.NextPrim());
@@ -1104,7 +1123,7 @@ static void TessellationStages(
SWR_ASSERT(pfnClipFunc);
pfnClipFunc(pDC, tessPa, workerId, prim,
- GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID));
+ GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0));
}
}
@@ -1359,7 +1378,7 @@ void ProcessDraw(
{
SWR_ASSERT(pDC->pState->pfnProcessPrims);
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
- GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID));
+ GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), _simd_set1_epi32(0));
}
}
}
@@ -1727,6 +1746,7 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param tri - Contains triangle position data for SIMDs worth of triangles.
/// @param primID - Primitive ID for each triangle.
+/// @param viewportIdx - viewport array index for each triangle.
/// @tparam CT - ConservativeRastFETraits
template <typename CT>
void BinTriangles(
@@ -1735,7 +1755,8 @@ void BinTriangles(
uint32_t workerId,
simdvector tri[3],
uint32_t triMask,
- simdscalari primID)
+ simdscalari primID,
+ simdscalari viewportIdx)
{
RDTSC_START(FEBinTriangles);
@@ -1770,7 +1791,14 @@ void BinTriangles(
tri[2].v[2] = _simd_mul_ps(tri[2].v[2], vRecipW2);
// viewport transform to screen coords
- viewportTransform<3>(tri, state.vpMatrices);
+ if (state.gsState.emitsViewportArrayIndex)
+ {
+ viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
+ }
+ else
+ {
+ viewportTransform<3>(tri, state.vpMatrices);
+ }
}
// adjust for pixel center location
@@ -2119,7 +2147,8 @@ void BinPoints(
uint32_t workerId,
simdvector prim[3],
uint32_t primMask,
- simdscalari primID)
+ simdscalari primID,
+ simdscalari viewportIdx)
{
RDTSC_START(FEBinPoints);
@@ -2143,7 +2172,14 @@ void BinPoints(
primVerts.z = _simd_mul_ps(primVerts.z, vRecipW0);
// viewport transform to screen coords
- viewportTransform<1>(&primVerts, state.vpMatrices);
+ if (state.gsState.emitsViewportArrayIndex)
+ {
+ viewportTransform<1>(&primVerts, state.vpMatrices, viewportIdx);
+ }
+ else
+ {
+ viewportTransform<1>(&primVerts, state.vpMatrices);
+ }
}
// adjust for pixel center location
@@ -2429,7 +2465,8 @@ void BinLines(
uint32_t workerId,
simdvector prim[],
uint32_t primMask,
- simdscalari primID)
+ simdscalari primID,
+ simdscalari viewportIdx)
{
RDTSC_START(FEBinLines);
@@ -2461,7 +2498,14 @@ void BinLines(
prim[1].v[2] = _simd_mul_ps(prim[1].v[2], vRecipW1);
// viewport transform to screen coords
- viewportTransform<2>(prim, state.vpMatrices);
+ if (state.gsState.emitsViewportArrayIndex)
+ {
+ viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
+ }
+ else
+ {
+ viewportTransform<2>(prim, state.vpMatrices);
+ }
}
// adjust for pixel center location
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index d47f17f4235..5e7762af2d5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -219,6 +219,26 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
}
}
+template<uint32_t NumVerts>
+INLINE
+void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simdscalari vViewportIdx)
+{
+ // perform a gather of each matrix element based on the viewport array indexes
+ simdscalar m00 = _simd_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 1);
+ simdscalar m30 = _simd_i32gather_ps(&vpMatrices.m30[0], vViewportIdx, 1);
+ simdscalar m11 = _simd_i32gather_ps(&vpMatrices.m11[0], vViewportIdx, 1);
+ simdscalar m31 = _simd_i32gather_ps(&vpMatrices.m31[0], vViewportIdx, 1);
+ simdscalar m22 = _simd_i32gather_ps(&vpMatrices.m22[0], vViewportIdx, 1);
+ simdscalar m32 = _simd_i32gather_ps(&vpMatrices.m32[0], vViewportIdx, 1);
+
+ for (uint32_t i = 0; i < NumVerts; ++i)
+ {
+ v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
+ v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
+ v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
+ }
+}
+
INLINE
void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
{
@@ -288,6 +308,6 @@ void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, vo
PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
struct PA_STATE_BASE; // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
+void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
+void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);