aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr
diff options
context:
space:
mode:
authorAlok Hota <[email protected]>2018-05-16 11:14:18 -0500
committerGeorge Kyriazis <[email protected]>2018-05-17 10:53:01 -0500
commita0dddac1cb88b1d518d9875cec2e8133ec6cddfc (patch)
tree20d212097c1117bb4aa10d3f41408ee99981fff5 /src/gallium/drivers/swr
parent7926d18fa5df028f73fbbb8f30f81cb7f3c79901 (diff)
swr/rast: Rectlist support for GS
Add rectlist as an option for GS. Needed to support some driver optimizations. Reviewed-By: George Kyriazis <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.cpp24
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/pa.h68
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h2
6 files changed, 102 insertions, 1 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
index 780ca15ce71..e6c22180683 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -160,6 +160,15 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
return i;
}
+void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+ simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+{
+ RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
+ Clipper<SIMD256, 3> clipper(workerId, pDC);
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
+ RDTSC_END(FEClipRectangles, 1);
+}
+
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
{
@@ -188,6 +197,21 @@ void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector p
}
#if USE_SIMD16_FRONTEND
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+ simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+{
+ RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
+
+ enum { VERTS_PER_PRIM = 3 };
+
+ Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
+
+ pa.useAlternateOffset = false;
+ clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
+
+ RDTSC_END(FEClipRectangles, 1);
+}
+
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 0f8399c742c..90ae4263575 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -531,6 +531,10 @@ public:
{
clipTopology = TOP_POINT_LIST;
}
+ else if (pa.binTopology == TOP_RECT_LIST)
+ {
+ clipTopology = TOP_RECT_LIST;
+ }
}
else if (NumVertsPerPrim == 2)
{
@@ -1149,10 +1153,12 @@ private:
// pipeline stage functions
+void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
#if USE_SIMD16_FRONTEND
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 1847c3e822d..47c0662e5ee 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -868,6 +868,7 @@ static void GeometryShaderStage(
{
switch (pState->outputTopology)
{
+ case TOP_RECT_LIST: pfnClipFunc = ClipRectangles_simd16; break;
case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles_simd16; break;
case TOP_LINE_STRIP: pfnClipFunc = ClipLines_simd16; break;
case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break;
@@ -881,6 +882,7 @@ static void GeometryShaderStage(
{
switch (pState->outputTopology)
{
+ case TOP_RECT_LIST: pfnClipFunc = ClipRectangles; break;
case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles; break;
case TOP_LINE_STRIP: pfnClipFunc = ClipLines; break;
case TOP_POINT_LIST: pfnClipFunc = ClipPoints; break;
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index ed644c044c3..ab1d46de9d0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -481,6 +481,7 @@ struct PA_STATE_CUT : public PA_STATE
case TOP_LINE_LIST_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj : &PA_STATE_CUT::ProcessVertLineListAdjNoGs; break;
case TOP_LINE_STRIP: pfnPa = &PA_STATE_CUT::ProcessVertLineStrip; break;
case TOP_LISTSTRIP_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs; break;
+ case TOP_RECT_LIST: pfnPa = &PA_STATE_CUT::ProcessVertRectList; break;
default: assert(0 && "Unimplemented topology");
}
}
@@ -719,6 +720,20 @@ struct PA_STATE_CUT : public PA_STATE
}
}
+ // compute the implied 4th vertex, v3
+ if (this->binTopology == TOP_RECT_LIST)
+ {
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ // v1, v3 = v1 + v2 - v0, v2
+ // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
+ simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
+ temp = _simd16_sub_ps(temp, verts[1].v[c]);
+ temp = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
+ verts[1].v[c] = _simd16_extract_ps(temp, 0);
+ }
+ }
+
return true;
}
@@ -766,6 +781,19 @@ struct PA_STATE_CUT : public PA_STATE
}
}
+ // compute the implied 4th vertex, v3
+ if (this->binTopology == TOP_RECT_LIST)
+ {
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ // v1, v3 = v1 + v2 - v0, v2
+ // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
+ simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
+ temp = _simd16_sub_ps(temp, verts[1].v[c]);
+ verts[1].v[c] = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
+ }
+ }
+
return true;
}
@@ -790,6 +818,21 @@ struct PA_STATE_CUT : public PA_STATE
offset += SIMD_WIDTH * sizeof(float);
}
}
+
+ // compute the implied 4th vertex, v3
+ if ((this->binTopology == TOP_RECT_LIST) && (triIndex % 2 == 1))
+ {
+ // v1, v3 = v1 + v2 - v0, v2
+ // v1 stored in tri[0], v0 stored in tri[1], v2 stored in tri[2]
+ float* pVert0 = (float*)&tri[1];
+ float* pVert1 = (float*)&tri[0];
+ float* pVert2 = (float*)&tri[2];
+ float* pVert3 = (float*)&tri[1];
+ for (uint32_t c = 0; c < 4; ++c)
+ {
+ pVert3[c] = pVert1[c] + pVert2[c] - pVert0[c];
+ }
+ }
}
uint32_t NumPrims()
@@ -1135,6 +1178,31 @@ struct PA_STATE_CUT : public PA_STATE
this->curIndex = 0;
}
}
+
+ void ProcessVertRectList(uint32_t index, bool finish)
+ {
+ this->vert[this->curIndex] = index;
+ this->curIndex++;
+ if (this->curIndex == 3)
+ {
+ // assembled enough verts for prim, add to gather indices
+ this->indices[0][this->numPrimsAssembled] = this->vert[0];
+ this->indices[1][this->numPrimsAssembled] = this->vert[1];
+ this->indices[2][this->numPrimsAssembled] = this->vert[2];
+
+ // second triangle in the rectangle
+ // v1, v3 = v1 + v2 - v0, v2
+ this->indices[0][this->numPrimsAssembled+1] = this->vert[1];
+ this->indices[1][this->numPrimsAssembled+1] = this->vert[0];
+ this->indices[2][this->numPrimsAssembled+1] = this->vert[2];
+
+ // increment numPrimsAssembled
+ this->numPrimsAssembled += 2;
+
+ // set up next prim state
+ this->curIndex = 0;
+ }
+ }
};
// Primitive Assembly for data output from the DomainShader.
diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
index 5ee8dec6dab..3a7ee4c3f02 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
@@ -59,6 +59,7 @@ enum CORE_BUCKETS
FEClipPoints,
FEClipLines,
FEClipTriangles,
+ FEClipRectangles,
FECullZeroAreaAndBackface,
FECullBetweenCenters,
FEEarlyRastEnter,
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index f160913a652..c26dabe8383 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -746,7 +746,7 @@ struct SWR_GS_STATE
// Stride of incoming verts in attributes
uint32_t inputVertStride;
- // Output topology - can be point, tristrip, or linestrip
+ // Output topology - can be point, tristrip, linestrip, or rectlist
PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
// Maximum number of verts that can be emitted by a single instance of the GS