diff options
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/binner.cpp | 4 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/clip.h | 40 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 10 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/state.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_shader.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_state.cpp | 2 |
6 files changed, 49 insertions, 21 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 036d8b1e7da..19eef9bb15c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -80,12 +80,12 @@ INLINE void ProcessAttributes( if (IsSwizzledT::value) { SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i]; - inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib; + inputSlot = backendState.vertexAttribOffset + attribSwizzle.sourceAttrib; } else { - inputSlot = VERTEX_ATTRIB_START_SLOT + i; + inputSlot = backendState.vertexAttribOffset + i; } __m128 attrib[3]; // triangle attribs (always 4 wide) diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 12b52c5847e..4f940d931c4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -489,7 +489,7 @@ public: // Compute absolute attrib slot in vertex array uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot; maxSlot = std::max<int32_t>(maxSlot, mapSlot); - uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot; + uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot; pa.Assemble(inputSlot, tmpVector); @@ -625,10 +625,10 @@ public: } // transpose attribs - pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_ATTRIB_START_SLOT]) + sizeof(float) * inputPrim; + pBase = (uint8_t*)(&vertices[0].attrib[backendState.vertexAttribOffset]) + sizeof(float) * inputPrim; for (uint32_t attrib = 0; attrib < numAttribs; ++attrib) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib; + uint32_t attribSlot = backendState.vertexAttribOffset + attrib; for (uint32_t c = 0; c < 4; ++c) { #if USE_SIMD16_FRONTEND @@ -746,7 +746,7 @@ public: // Compute absolute attrib slot in vertex array uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot; maxSlot = std::max<int32_t>(maxSlot, mapSlot); - uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot; + uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot; pa.Assemble_simd16(inputSlot, tmpVector); @@ -877,10 +877,10 @@ public: } // transpose attribs - pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_ATTRIB_START_SLOT]) + sizeof(float) * inputPrim; + pBase = (uint8_t*)(&vertices[0].attrib[backendState.vertexAttribOffset]) + sizeof(float) * inputPrim; for (uint32_t attrib = 0; attrib < numAttribs; ++attrib) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib; + uint32_t attribSlot = backendState.vertexAttribOffset + attrib; for (uint32_t c = 0; c < 4; ++c) { simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, vMask, 1); @@ -1230,6 +1230,8 @@ private: uint32_t numInAttribs, // number of attributes per vertex. float *pOutVerts) // array of output positions. We'll write our new intersection point at i*4. { + uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset; + // compute interpolation factor simdscalar t; switch (ClippingPlane) @@ -1263,7 +1265,7 @@ private: // interpolate attributes and store for (uint32_t a = 0; a < numInAttribs; ++a) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a; + uint32_t attribSlot = vertexAttribOffset + a; for (uint32_t c = 0; c < 4; ++c) { simdscalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c); @@ -1312,6 +1314,8 @@ private: uint32_t numInAttribs, // number of attributes per vertex. float *pOutVerts) // array of output positions. We'll write our new intersection point at i*4. { + uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset; + // compute interpolation factor simd16scalar t; switch (ClippingPlane) @@ -1345,7 +1349,7 @@ private: // interpolate attributes and store for (uint32_t a = 0; a < numInAttribs; ++a) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a; + uint32_t attribSlot = vertexAttribOffset + a; for (uint32_t c = 0; c < 4; ++c) { simd16scalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c); @@ -1421,6 +1425,8 @@ private: template<SWR_CLIPCODES ClippingPlane> simdscalari ClipTriToPlane(const float* pInVerts, const simdscalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts) { + uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset; + simdscalari vCurIndex = _simd_setzero_si(); simdscalari vOutIndex = _simd_setzero_si(); simdscalar vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts)); @@ -1461,7 +1467,7 @@ private: // store attribs for (uint32_t a = 0; a < numInAttribs; ++a) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a; + uint32_t attribSlot = vertexAttribOffset + a; for (uint32_t c = 0; c < 4; ++c) { simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c); @@ -1515,6 +1521,8 @@ private: template<SWR_CLIPCODES ClippingPlane> simd16scalari ClipTriToPlane(const float* pInVerts, const simd16scalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts) { + uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset; + simd16scalari vCurIndex = _simd16_setzero_si(); simd16scalari vOutIndex = _simd16_setzero_si(); simd16scalar vActiveMask = _simd16_castsi_ps(_simd16_cmplt_epi32(vCurIndex, vNumInPts)); @@ -1555,7 +1563,7 @@ private: // store attribs for (uint32_t a = 0; a < numInAttribs; ++a) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a; + uint32_t attribSlot = vertexAttribOffset + a; for (uint32_t c = 0; c < 4; ++c) { simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c); @@ -1609,6 +1617,8 @@ private: template<SWR_CLIPCODES ClippingPlane> simdscalari ClipLineToPlane(const float* pInVerts, const simdscalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts) { + uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset; + simdscalari vCurIndex = _simd_setzero_si(); simdscalari vOutIndex = _simd_setzero_si(); simdscalar vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts)); @@ -1646,7 +1656,7 @@ private: // interpolate attributes and store for (uint32_t a = 0; a < numInAttribs; ++a) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a; + uint32_t attribSlot = vertexAttribOffset + a; for (uint32_t c = 0; c < 4; ++c) { simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c); @@ -1679,7 +1689,7 @@ private: // interpolate attributes and store for (uint32_t a = 0; a < numInAttribs; ++a) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a; + uint32_t attribSlot = vertexAttribOffset + a; for (uint32_t c = 0; c < 4; ++c) { simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, p_in, p, c); @@ -1699,6 +1709,8 @@ private: template<SWR_CLIPCODES ClippingPlane> simd16scalari ClipLineToPlane(const float* pInVerts, const simd16scalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts) { + uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset; + simd16scalari vCurIndex = _simd16_setzero_si(); simd16scalari vOutIndex = _simd16_setzero_si(); simd16scalar vActiveMask = _simd16_castsi_ps(_simd16_cmplt_epi32(vCurIndex, vNumInPts)); @@ -1736,7 +1748,7 @@ private: // interpolate attributes and store for (uint32_t a = 0; a < numInAttribs; ++a) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a; + uint32_t attribSlot = vertexAttribOffset + a; for (uint32_t c = 0; c < 4; ++c) { simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c); @@ -1769,7 +1781,7 @@ private: // interpolate attributes and store for (uint32_t a = 0; a < numInAttribs; ++a) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a; + uint32_t attribSlot = vertexAttribOffset + a; for (uint32_t c = 0; c < 4; ++c) { simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, p_in, p, c); diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 1cd166d83ff..9e2f35725c5 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -528,7 +528,7 @@ static void StreamOut( while (_BitScanForward(&slot, soMask)) { __m128 attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide) - uint32_t paSlot = slot + VERTEX_ATTRIB_START_SLOT; + uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex]; pa.AssembleSingle(paSlot, primIndex, attrib); // Attribute offset is relative offset from start of vertex. @@ -792,12 +792,12 @@ static void GeometryShaderStage( // assemble all attributes for the input primitive for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot; + uint32_t attribSlot = pState->vertexAttribOffset + slot; pa.Assemble(attribSlot, attrib); for (uint32_t i = 0; i < numVertsPerPrim; ++i) { - tlsGsContext.vert[i].attrib[attribSlot] = attrib[i]; + tlsGsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = attrib[i]; } } @@ -1131,12 +1131,12 @@ static void TessellationStages( // assemble all attributes for the input primitives for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot) { - uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot; + uint32_t attribSlot = tsState.vertexAttribOffset + slot; pa.Assemble(attribSlot, simdattrib); for (uint32_t i = 0; i < numVertsPerPrim; ++i) { - hsContext.vert[i].attrib[attribSlot] = simdattrib[i]; + hsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = simdattrib[i]; } } diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 2440d445728..4fbd74ddc4a 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -673,6 +673,9 @@ struct SWR_STREAMOUT_STATE // Number of attributes, including position, per vertex that are streamed out. // This should match number of bits in stream mask. uint32_t streamNumEntries[MAX_SO_STREAMS]; + + // Offset to the start of the attributes of the input vertices, in simdvector units + uint32_t vertexAttribOffset[MAX_SO_STREAMS]; }; ////////////////////////////////////////////////////////////////////////// @@ -718,6 +721,9 @@ struct SWR_GS_STATE // when single stream is enabled, singleStreamID dictates which stream is being output. // field ignored if isSingleStream is false uint32_t singleStreamID; + + // Offset to the start of the attributes of the input vertices, in simdvector units + uint32_t vertexAttribOffset; }; @@ -773,6 +779,9 @@ struct SWR_TS_STATE uint32_t numHsInputAttribs; uint32_t numHsOutputAttribs; uint32_t numDsOutputAttribs; + + // Offset to the start of the attributes of the input vertices, in simdvector units + uint32_t vertexAttribOffset; }; // output merger state @@ -1047,6 +1056,9 @@ struct SWR_BACKEND_STATE bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning + + // Offset to the start of the attributes of the input vertices, in simdvector units + uint32_t vertexAttribOffset; }; diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index dfc54fa7125..83b49c47635 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -551,6 +551,8 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) pGS->isSingleStream = true; pGS->singleStreamID = 0; + pGS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize + struct swr_geometry_shader *gs = ctx->gs; LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 7a8786d96f4..897ce998fff 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -351,6 +351,7 @@ swr_create_vs_state(struct pipe_context *pipe, for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) { swr_vs->soState.streamNumEntries[i] = _mm_popcnt_u32(swr_vs->soState.streamMasks[i]); + swr_vs->soState.vertexAttribOffset[i] = VERTEX_ATTRIB_START_SLOT; // TODO: optimize } } @@ -1747,6 +1748,7 @@ swr_update_derived(struct pipe_context *pipe, &ctx->vs->info.base; backendState.readRenderTargetArrayIndex = pLastFE->writes_layer; backendState.readViewportArrayIndex = pLastFE->writes_viewport_index; + backendState.vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize SwrSetBackendState(ctx->swrContext, &backendState); |