diff options
author | Krzysztof Raszkowski <[email protected]> | 2019-10-29 14:50:02 +0000 |
---|---|---|
committer | Jan Zielinski <[email protected]> | 2019-10-29 14:50:02 +0000 |
commit | 163d5fde06696fed2e69e000a7621087c1636749 (patch) | |
tree | 23c7c0c901996e06dab1b747247bf95e6a484322 /src/gallium/drivers | |
parent | 44971b84b70b35e260b09493b6f75304cd58965f (diff) |
gallium/swr: Enable GL_ARB_gpu_shader5: multiple streams
Added support for geometry shader multiple streams (part of
GL_ARB_gpu_shader5 extension).
Reviewed-by: Jan Zielinski <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 4 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_screen.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_shader.cpp | 75 |
3 files changed, 70 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 13e92e8640a..ab079ab4aa0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -702,8 +702,8 @@ void ProcessStreamIdBuffer(uint32_t stream, { SWR_ASSERT(stream < MAX_SO_STREAMS); - uint32_t numInputBytes = (numEmittedVerts * 2 + 7) / 8; - uint32_t numOutputBytes = std::max(numInputBytes / 2, 1U); + uint32_t numInputBytes = AlignUp(numEmittedVerts * 2, 8) / 8; + uint32_t numOutputBytes = AlignUp(numEmittedVerts, 8) / 8; for (uint32_t b = 0; b < numOutputBytes; ++b) { diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 030b62a15ae..6c596a463b4 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -191,7 +191,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: return 1024; case PIPE_CAP_MAX_VERTEX_STREAMS: - return 1; + return 4; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index c8e34b8adb8..e5e5411fb10 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -251,7 +251,8 @@ struct BuilderSWR : public Builder { swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, struct lp_build_context * bld, LLVMValueRef (*outputs)[4], - LLVMValueRef emitted_vertices_vec); + LLVMValueRef emitted_vertices_vec, + LLVMValueRef stream_id); void swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, @@ -306,13 +307,15 @@ static void swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, struct lp_build_context * bld, LLVMValueRef (*outputs)[4], - LLVMValueRef emitted_vertices_vec) + LLVMValueRef emitted_vertices_vec, + LLVMValueRef stream_id) { swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld, outputs, - emitted_vertices_vec); + emitted_vertices_vec, + stream_id); } static void @@ -411,12 +414,12 @@ void BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, struct lp_build_context * bld, LLVMValueRef (*outputs)[4], - LLVMValueRef emitted_vertices_vec) + LLVMValueRef emitted_vertices_vec, + LLVMValueRef stream_id) { swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); - const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE; const uint32_t attribSize = 4 * sizeof(float); const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS; @@ -478,6 +481,49 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, } } + /* When the output type is not points, the geometry shader may not + * output data to multiple streams. So early exit here. + */ + if(iface->pGsState->outputTopology != TOP_POINT_LIST) { + STACKRESTORE(pStack); + return; + } + + // Info about stream id for each vertex + // is coded in 2 bits (4 vert per byte "box"): + // ----------------- ----------------- ---- + // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |... + // ----------------- ----------------- ---- + + // Calculate where need to put stream id for current vert + // in 1 byte "box". + Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2)); + + // Calculate in which box put stream id for current vert. + Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2)); + + // Skip count header + Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE)); + + for (uint32_t lane = 0; lane < mVWidth; ++lane) { + Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty); + Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); + + Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane))); + + // Just make sure that not overflow max - stream id = (0,1,2,3) + Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty); + + // Shift it to correct position in byte "box" + vVal = SHL(vVal, pShift); + + // Info about other vertices can be already stored + // so we need to read and add bits from current vert info. + Value *storedValue = LOAD(pStreamOffset); + vVal = OR(storedValue, vVal); + STORE(vVal, pStreamOffset); + } + STACKRESTORE(pStack); } @@ -491,6 +537,15 @@ BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, { swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; + /* When the output type is points, the geometry shader may output data + * to multiple streams, and end_primitive has no effect. Info about + * stream id for vertices is stored into the same place in memory where + * end primitive info is stored so early exit in this case. + */ + if (iface->pGsState->outputTopology == TOP_POINT_LIST) { + return; + } + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask }); @@ -569,9 +624,13 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS]; - // XXX: single stream for now... - pGS->isSingleStream = true; - pGS->singleStreamID = 0; + // If point primitive then assume to use multiple streams + if(pGS->outputTopology == TOP_POINT_LIST) { + pGS->isSingleStream = false; + } else { + pGS->isSingleStream = true; + pGS->singleStreamID = 0; + } pGS->vertexAttribOffset = VERTEX_POSITION_SLOT; pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset; |