aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/swr')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/binner.cpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h40
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp10
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h12
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp2
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp2
6 files changed, 49 insertions, 21 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index 036d8b1e7da..19eef9bb15c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -80,12 +80,12 @@ INLINE void ProcessAttributes(
if (IsSwizzledT::value)
{
SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
- inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib;
+ inputSlot = backendState.vertexAttribOffset + attribSwizzle.sourceAttrib;
}
else
{
- inputSlot = VERTEX_ATTRIB_START_SLOT + i;
+ inputSlot = backendState.vertexAttribOffset + i;
}
__m128 attrib[3]; // triangle attribs (always 4 wide)
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 12b52c5847e..4f940d931c4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -489,7 +489,7 @@ public:
// Compute absolute attrib slot in vertex array
uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
maxSlot = std::max<int32_t>(maxSlot, mapSlot);
- uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot;
+ uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot;
pa.Assemble(inputSlot, tmpVector);
@@ -625,10 +625,10 @@ public:
}
// transpose attribs
- pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_ATTRIB_START_SLOT]) + sizeof(float) * inputPrim;
+ pBase = (uint8_t*)(&vertices[0].attrib[backendState.vertexAttribOffset]) + sizeof(float) * inputPrim;
for (uint32_t attrib = 0; attrib < numAttribs; ++attrib)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
+ uint32_t attribSlot = backendState.vertexAttribOffset + attrib;
for (uint32_t c = 0; c < 4; ++c)
{
#if USE_SIMD16_FRONTEND
@@ -746,7 +746,7 @@ public:
// Compute absolute attrib slot in vertex array
uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
maxSlot = std::max<int32_t>(maxSlot, mapSlot);
- uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot;
+ uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot;
pa.Assemble_simd16(inputSlot, tmpVector);
@@ -877,10 +877,10 @@ public:
}
// transpose attribs
- pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_ATTRIB_START_SLOT]) + sizeof(float) * inputPrim;
+ pBase = (uint8_t*)(&vertices[0].attrib[backendState.vertexAttribOffset]) + sizeof(float) * inputPrim;
for (uint32_t attrib = 0; attrib < numAttribs; ++attrib)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
+ uint32_t attribSlot = backendState.vertexAttribOffset + attrib;
for (uint32_t c = 0; c < 4; ++c)
{
simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, vMask, 1);
@@ -1230,6 +1230,8 @@ private:
uint32_t numInAttribs, // number of attributes per vertex.
float *pOutVerts) // array of output positions. We'll write our new intersection point at i*4.
{
+ uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
+
// compute interpolation factor
simdscalar t;
switch (ClippingPlane)
@@ -1263,7 +1265,7 @@ private:
// interpolate attributes and store
for (uint32_t a = 0; a < numInAttribs; ++a)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
+ uint32_t attribSlot = vertexAttribOffset + a;
for (uint32_t c = 0; c < 4; ++c)
{
simdscalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
@@ -1312,6 +1314,8 @@ private:
uint32_t numInAttribs, // number of attributes per vertex.
float *pOutVerts) // array of output positions. We'll write our new intersection point at i*4.
{
+ uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
+
// compute interpolation factor
simd16scalar t;
switch (ClippingPlane)
@@ -1345,7 +1349,7 @@ private:
// interpolate attributes and store
for (uint32_t a = 0; a < numInAttribs; ++a)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
+ uint32_t attribSlot = vertexAttribOffset + a;
for (uint32_t c = 0; c < 4; ++c)
{
simd16scalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
@@ -1421,6 +1425,8 @@ private:
template<SWR_CLIPCODES ClippingPlane>
simdscalari ClipTriToPlane(const float* pInVerts, const simdscalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
{
+ uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
+
simdscalari vCurIndex = _simd_setzero_si();
simdscalari vOutIndex = _simd_setzero_si();
simdscalar vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts));
@@ -1461,7 +1467,7 @@ private:
// store attribs
for (uint32_t a = 0; a < numInAttribs; ++a)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
+ uint32_t attribSlot = vertexAttribOffset + a;
for (uint32_t c = 0; c < 4; ++c)
{
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
@@ -1515,6 +1521,8 @@ private:
template<SWR_CLIPCODES ClippingPlane>
simd16scalari ClipTriToPlane(const float* pInVerts, const simd16scalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
{
+ uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
+
simd16scalari vCurIndex = _simd16_setzero_si();
simd16scalari vOutIndex = _simd16_setzero_si();
simd16scalar vActiveMask = _simd16_castsi_ps(_simd16_cmplt_epi32(vCurIndex, vNumInPts));
@@ -1555,7 +1563,7 @@ private:
// store attribs
for (uint32_t a = 0; a < numInAttribs; ++a)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
+ uint32_t attribSlot = vertexAttribOffset + a;
for (uint32_t c = 0; c < 4; ++c)
{
simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
@@ -1609,6 +1617,8 @@ private:
template<SWR_CLIPCODES ClippingPlane>
simdscalari ClipLineToPlane(const float* pInVerts, const simdscalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
{
+ uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
+
simdscalari vCurIndex = _simd_setzero_si();
simdscalari vOutIndex = _simd_setzero_si();
simdscalar vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts));
@@ -1646,7 +1656,7 @@ private:
// interpolate attributes and store
for (uint32_t a = 0; a < numInAttribs; ++a)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
+ uint32_t attribSlot = vertexAttribOffset + a;
for (uint32_t c = 0; c < 4; ++c)
{
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
@@ -1679,7 +1689,7 @@ private:
// interpolate attributes and store
for (uint32_t a = 0; a < numInAttribs; ++a)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
+ uint32_t attribSlot = vertexAttribOffset + a;
for (uint32_t c = 0; c < 4; ++c)
{
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, p_in, p, c);
@@ -1699,6 +1709,8 @@ private:
template<SWR_CLIPCODES ClippingPlane>
simd16scalari ClipLineToPlane(const float* pInVerts, const simd16scalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
{
+ uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
+
simd16scalari vCurIndex = _simd16_setzero_si();
simd16scalari vOutIndex = _simd16_setzero_si();
simd16scalar vActiveMask = _simd16_castsi_ps(_simd16_cmplt_epi32(vCurIndex, vNumInPts));
@@ -1736,7 +1748,7 @@ private:
// interpolate attributes and store
for (uint32_t a = 0; a < numInAttribs; ++a)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
+ uint32_t attribSlot = vertexAttribOffset + a;
for (uint32_t c = 0; c < 4; ++c)
{
simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
@@ -1769,7 +1781,7 @@ private:
// interpolate attributes and store
for (uint32_t a = 0; a < numInAttribs; ++a)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
+ uint32_t attribSlot = vertexAttribOffset + a;
for (uint32_t c = 0; c < 4; ++c)
{
simd16scalar vAttrib = GatherComponent(pInVerts, attribSlot, p_in, p, c);
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 1cd166d83ff..9e2f35725c5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -528,7 +528,7 @@ static void StreamOut(
while (_BitScanForward(&slot, soMask))
{
__m128 attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide)
- uint32_t paSlot = slot + VERTEX_ATTRIB_START_SLOT;
+ uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex];
pa.AssembleSingle(paSlot, primIndex, attrib);
// Attribute offset is relative offset from start of vertex.
@@ -792,12 +792,12 @@ static void GeometryShaderStage(
// assemble all attributes for the input primitive
for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot;
+ uint32_t attribSlot = pState->vertexAttribOffset + slot;
pa.Assemble(attribSlot, attrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
- tlsGsContext.vert[i].attrib[attribSlot] = attrib[i];
+ tlsGsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = attrib[i];
}
}
@@ -1131,12 +1131,12 @@ static void TessellationStages(
// assemble all attributes for the input primitives
for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot)
{
- uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot;
+ uint32_t attribSlot = tsState.vertexAttribOffset + slot;
pa.Assemble(attribSlot, simdattrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
{
- hsContext.vert[i].attrib[attribSlot] = simdattrib[i];
+ hsContext.vert[i].attrib[VERTEX_ATTRIB_START_SLOT + slot] = simdattrib[i];
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 2440d445728..4fbd74ddc4a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -673,6 +673,9 @@ struct SWR_STREAMOUT_STATE
// Number of attributes, including position, per vertex that are streamed out.
// This should match number of bits in stream mask.
uint32_t streamNumEntries[MAX_SO_STREAMS];
+
+ // Offset to the start of the attributes of the input vertices, in simdvector units
+ uint32_t vertexAttribOffset[MAX_SO_STREAMS];
};
//////////////////////////////////////////////////////////////////////////
@@ -718,6 +721,9 @@ struct SWR_GS_STATE
// when single stream is enabled, singleStreamID dictates which stream is being output.
// field ignored if isSingleStream is false
uint32_t singleStreamID;
+
+ // Offset to the start of the attributes of the input vertices, in simdvector units
+ uint32_t vertexAttribOffset;
};
@@ -773,6 +779,9 @@ struct SWR_TS_STATE
uint32_t numHsInputAttribs;
uint32_t numHsOutputAttribs;
uint32_t numDsOutputAttribs;
+
+ // Offset to the start of the attributes of the input vertices, in simdvector units
+ uint32_t vertexAttribOffset;
};
// output merger state
@@ -1047,6 +1056,9 @@ struct SWR_BACKEND_STATE
bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
+
+ // Offset to the start of the attributes of the input vertices, in simdvector units
+ uint32_t vertexAttribOffset;
};
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index dfc54fa7125..83b49c47635 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -551,6 +551,8 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
pGS->isSingleStream = true;
pGS->singleStreamID = 0;
+ pGS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
+
struct swr_geometry_shader *gs = ctx->gs;
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 7a8786d96f4..897ce998fff 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -351,6 +351,7 @@ swr_create_vs_state(struct pipe_context *pipe,
for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
swr_vs->soState.streamNumEntries[i] =
_mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
+ swr_vs->soState.vertexAttribOffset[i] = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
}
}
@@ -1747,6 +1748,7 @@ swr_update_derived(struct pipe_context *pipe,
&ctx->vs->info.base;
backendState.readRenderTargetArrayIndex = pLastFE->writes_layer;
backendState.readViewportArrayIndex = pLastFE->writes_viewport_index;
+ backendState.vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
SwrSetBackendState(ctx->swrContext, &backendState);