aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-05-08 12:45:20 -0500
committerTim Rowley <[email protected]>2017-05-30 17:20:56 -0500
commitf64aea0959af955841bbde96885aebacb44b4aaf (patch)
tree9ca4e23f2227149091afcc75246d4efe257f5a07
parentcbd33e71f73842ef80bcd32e9c0e26a4989a532c (diff)
swr/rast: SIMD16 FE - interleaved simdvertex output in GS
Eliminates conversion copies on GS output from simdvertex to simd16vertex. Reviewed-by: Bruce Cherniak <[email protected]>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp22
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp29
2 files changed, 31 insertions, 20 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 3886c64ccf6..e88246f478f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -717,10 +717,6 @@ void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t num
THREAD SWR_GS_CONTEXT tlsGsContext;
-#if USE_SIMD16_FRONTEND
-THREAD simd16vertex tempVertex_simd16[128];
-
-#endif
template<typename SIMDVERTEX, uint32_t SIMD_WIDTH>
struct GsBufferInfo
{
@@ -819,7 +815,11 @@ static void GeometryShaderStage(
tlsGsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = attrib[i];
}
+#if USE_SIMD16_FRONTEND
+ const GsBufferInfo<simd16vertex, KNOB_SIMD16_WIDTH> bufferInfo(state.gsState);
+#else
const GsBufferInfo<simdvertex, KNOB_SIMD_WIDTH> bufferInfo(state.gsState);
+#endif
// record valid prims from the frontend to avoid over binning the newly generated
// prims from the GS
@@ -923,19 +923,7 @@ static void GeometryShaderStage(
}
#if USE_SIMD16_FRONTEND
- // TEMPORARY: GS outputs simdvertex, PA inputs simd16vertex, so convert simdvertex to simd16vertex
-
- SWR_ASSERT(numEmittedVerts <= 256);
-
- PackPairsOfSimdVertexIntoSimd16Vertex(
- tempVertex_simd16,
- reinterpret_cast<const simdvertex *>(pBase),
- numEmittedVerts,
- SWR_VTX_NUM_SLOTS);
-
-#endif
-#if USE_SIMD16_FRONTEND
- PA_STATE_CUT gsPa(pDC, reinterpret_cast<uint8_t *>(tempVertex_simd16), numEmittedVerts, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
+ PA_STATE_CUT gsPa(pDC, pBase, numEmittedVerts, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
#else
PA_STATE_CUT gsPa(pDC, pBase, numEmittedVerts, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts);
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index d55820eb754..2f495f59c23 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -370,8 +370,13 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+#if USE_SIMD16_FRONTEND
+ const uint32_t simdVertexStride = sizeof(simdvertex) * 2;
+ const uint32_t numSimdBatches = (pGS->maxNumVerts + (mVWidth * 2) - 1) / (mVWidth * 2);
+#else
const uint32_t simdVertexStride = sizeof(simdvertex);
- const uint32_t numSimdBatches = (pGS->maxNumVerts + 7) / 8;
+ const uint32_t numSimdBatches = (pGS->maxNumVerts + mVWidth - 1) / mVWidth;
+#endif
const uint32_t inputPrimStride = numSimdBatches * simdVertexStride;
Value *pStream = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_pStream });
@@ -388,8 +393,14 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
inputPrimStride * 6,
inputPrimStride * 7 } );
- Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), 3);
- Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), 7);
+#if USE_SIMD16_FRONTEND
+ const uint32_t simdShift = log2(mVWidth * 2);
+ Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), (mVWidth * 2) - 1);
+#else
+ const uint32_t simdShift = log2(mVWidth);
+ Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), mVWidth - 1);
+#endif
+ Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), simdShift);
for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
uint32_t attribSlot = attrib;
@@ -400,10 +411,17 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER)
attribSlot = VERTEX_RTAI_SLOT;
+#if USE_SIMD16_FRONTEND
+ Value *vOffsetsAttrib =
+ ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex) * 2)));
+ vOffsetsAttrib =
+ ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector) * 2)));
+#else
Value *vOffsetsAttrib =
ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex))));
vOffsetsAttrib =
ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector))));
+#endif
vOffsetsAttrib =
ADD(vOffsetsAttrib, MUL(vSimdSlot, VIMMED1((uint32_t)sizeof(float))));
@@ -416,8 +434,13 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
MASKED_SCATTER(vData, vPtrs, 32, vMask1);
+#if USE_SIMD16_FRONTEND
+ vOffsetsAttrib =
+ ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar) * 2));
+#else
vOffsetsAttrib =
ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar)));
+#endif
}
}
}