summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp56
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h13
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h9
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h11
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp222
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/pa.h11
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h29
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h12
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp12
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp13
-rw-r--r--src/gallium/drivers/swr/swr_state.h1
11 files changed, 218 insertions, 171 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index aface7a77ef..c3a1539b506 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -606,31 +606,6 @@ void SwrSetBlendFunc(
pState->pfnBlendFunc[renderTarget] = pfnBlendFunc;
}
-void SwrSetLinkage(
- HANDLE hContext,
- uint32_t mask,
- const uint8_t* pMap)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- static const uint8_t IDENTITY_MAP[] =
- {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- };
- static_assert(sizeof(IDENTITY_MAP) == sizeof(pState->linkageMap),
- "Update for new value of MAX_ATTRIBUTES");
-
- pState->linkageMask = mask;
- pState->linkageCount = _mm_popcnt_u32(mask);
-
- if (!pMap)
- {
- pMap = IDENTITY_MAP;
- }
- memcpy(pState->linkageMap, pMap, pState->linkageCount);
-}
-
// update guardband multipliers for the viewport
void updateGuardband(API_STATE *pState)
{
@@ -847,25 +822,44 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
(pState->state.depthStencilState.depthWriteEnable == FALSE) &&
(pState->state.depthStencilState.stencilTestEnable == FALSE) &&
(pState->state.depthStencilState.stencilWriteEnable == FALSE) &&
- (pState->state.linkageCount == 0))
+ (pState->state.backendState.numAttributes == 0))
{
pState->pfnProcessPrims = nullptr;
- pState->state.linkageMask = 0;
}
if (pState->state.soState.rasterizerDisable == true)
{
pState->pfnProcessPrims = nullptr;
- pState->state.linkageMask = 0;
}
- // set up the frontend attrib mask
- pState->state.feAttribMask = pState->state.linkageMask;
+ // set up the frontend attribute count
+ pState->state.feNumAttributes = 0;
+ const SWR_BACKEND_STATE& backendState = pState->state.backendState;
+ if (backendState.swizzleEnable)
+ {
+ // attribute swizzling is enabled, iterate over the map and record the max attribute used
+ for (uint32_t i = 0; i < backendState.numAttributes; ++i)
+ {
+ pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
+ }
+ }
+ else
+ {
+ pState->state.feNumAttributes = pState->state.backendState.numAttributes;
+ }
+
if (pState->state.soState.soEnable)
{
+ uint32_t streamMasks = 0;
for (uint32_t i = 0; i < 4; ++i)
{
- pState->state.feAttribMask |= pState->state.soState.streamMasks[i];
+ streamMasks |= pState->state.soState.streamMasks[i];
+ }
+
+ DWORD maxAttrib;
+ if (_BitScanReverse(&maxAttrib, streamMasks))
+ {
+ pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
}
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index 04cdb9e4e65..ab56cab772e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -330,19 +330,6 @@ void SWR_API SwrSetBlendFunc(
PFN_BLEND_JIT_FUNC pfnBlendFunc);
//////////////////////////////////////////////////////////////////////////
-/// @brief Set linkage mask
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param mask - Specifies which vertex outputs are are needed by PS.
-/// @param pMap - (Optional)Linkage map to specify where FE attributes are
-/// gathered from to supply PS attribute values. The length
-/// of the map buffer needs to match the number of set bits
-/// in "mask".
-void SWR_API SwrSetLinkage(
- HANDLE hContext,
- uint32_t mask,
- const uint8_t* pMap);
-
-//////////////////////////////////////////////////////////////////////////
/// @brief SwrDraw
/// @param hContext - Handle passed back from SwrCreateContext
/// @param topology - Specifies topology for draw.
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 1a6fc6d2873..b2b3bb4e6fd 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -324,16 +324,13 @@ public:
}
// assemble attribs
- DWORD slot = 0;
- uint32_t mapIdx = 0;
- uint32_t tmpLinkage = this->state.linkageMask;
+ const SWR_BACKEND_STATE& backendState = this->state.backendState;
int32_t maxSlot = -1;
- while (_BitScanForward(&slot, tmpLinkage))
+ for (uint32_t slot = 0; slot < backendState.numAttributes; ++slot)
{
- tmpLinkage &= ~(1 << slot);
// Compute absolute attrib slot in vertex array
- uint32_t mapSlot = this->state.linkageMap[mapIdx++];
+ uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
maxSlot = std::max<int32_t>(maxSlot, mapSlot);
uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot;
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index be4c2e94b42..13dcdfca2ee 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -264,15 +264,8 @@ OSALIGNLINE(struct) API_STATE
PFN_DS_FUNC pfnDsFunc;
SWR_TS_STATE tsState;
- // Specifies which VS outputs are sent to PS.
- // Does not include position
- uint32_t linkageMask;
- uint32_t linkageCount;
- uint8_t linkageMap[MAX_ATTRIBUTES];
-
- // attrib mask, specifies the total set of attributes used
- // by the frontend (vs, so, gs)
- uint32_t feAttribMask;
+ // Number of attributes used by the frontend (vs, so, gs)
+ uint32_t feNumAttributes;
PRIMITIVE_TOPOLOGY topology;
bool forceFront;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index cc8ebda35bc..8537c59033c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -794,15 +794,7 @@ static void GeometryShaderStage(
uint8_t* pBase = pInstanceBase + instance * instanceStride;
uint8_t* pCutBase = pCutBufferBase + instance * cutInstanceStride;
- DWORD numAttribs;
- if (_BitScanReverse(&numAttribs, state.feAttribMask))
- {
- numAttribs++;
- }
- else
- {
- numAttribs = 0;
- }
+ uint32_t numAttribs = state.feNumAttributes;
for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
{
@@ -1445,7 +1437,6 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
}
-
//////////////////////////////////////////////////////////////////////////
/// @brief Processes attributes for the backend based on linkage mask and
/// linkage map. Essentially just doing an SOA->AOS conversion and pack.
@@ -1455,75 +1446,101 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
/// @param pLinkageMap - maps VS attribute slot to PS slot
/// @param triIndex - Triangle to process attributes for
/// @param pBuffer - Output result
-template<uint32_t NumVerts>
+template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT>
INLINE void ProcessAttributes(
DRAW_CONTEXT *pDC,
PA_STATE&pa,
- uint32_t linkageMask,
- const uint8_t* pLinkageMap,
uint32_t triIndex,
+ uint32_t primId,
float *pBuffer)
{
- DWORD slot = 0;
- uint32_t mapIdx = 0;
- LONG constantInterpMask = pDC->pState->state.backendState.constantInterpolationMask;
+ static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
+ const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+ LONG constantInterpMask = backendState.constantInterpolationMask;
const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology;
- while (_BitScanForward(&slot, linkageMask))
+ static const float constTable[3][4] = {
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 1.0f},
+ {1.0f, 1.0f, 1.0f, 1.0f}
+ };
+
+ for (uint32_t i = 0; i < backendState.numAttributes; ++i)
{
- linkageMask &= ~(1 << slot); // done with this bit.
+ uint32_t inputSlot;
+ if (IsSwizzledT::value)
+ {
+ SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
+ inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib;
- // compute absolute slot in vertex attrib array
- uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + pLinkageMap[mapIdx];
+ }
+ else
+ {
+ inputSlot = VERTEX_ATTRIB_START_SLOT + i;
+ }
__m128 attrib[3]; // triangle attribs (always 4 wide)
+ static const uint32_t numVerts = NumVertsT::value < 3 ? NumVertsT::value : 3;
+ float* pAttribStart = pBuffer;
- if (_bittest(&constantInterpMask, mapIdx))
+ if (HasConstantInterpT::value)
{
- uint32_t vid;
- static const uint32_t tristripProvokingVertex[] = {0, 2, 1};
- static const int32_t quadProvokingTri[2][4] = {{0, 0, 0, 1}, {0, -1, 0, 0}};
- static const uint32_t quadProvokingVertex[2][4] = {{0, 1, 2, 2}, {0, 1, 1, 2}};
- static const int32_t qstripProvokingTri[2][4] = {{0, 0, 0, 1}, {-1, 0, 0, 0}};
- static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}};
-
- switch (topo) {
- case TOP_QUAD_LIST:
- pa.AssembleSingle(inputSlot,
- triIndex + quadProvokingTri[triIndex & 1][provokingVertex],
- attrib);
- vid = quadProvokingVertex[triIndex & 1][provokingVertex];
- break;
- case TOP_QUAD_STRIP:
- pa.AssembleSingle(inputSlot,
- triIndex + qstripProvokingTri[triIndex & 1][provokingVertex],
- attrib);
- vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
- break;
- case TOP_TRIANGLE_STRIP:
- pa.AssembleSingle(inputSlot, triIndex, attrib);
- vid = (triIndex & 1)
- ? tristripProvokingVertex[provokingVertex]
- : provokingVertex;
- break;
- default:
- pa.AssembleSingle(inputSlot, triIndex, attrib);
- vid = provokingVertex;
- break;
- }
+ if (_bittest(&constantInterpMask, i))
+ {
+ uint32_t vid;
+ uint32_t adjustedTriIndex;
+ static const uint32_t tristripProvokingVertex[] = { 0, 2, 1 };
+ static const int32_t quadProvokingTri[2][4] = { {0, 0, 0, 1}, {0, -1, 0, 0} };
+ static const uint32_t quadProvokingVertex[2][4] = { {0, 1, 2, 2}, {0, 1, 1, 2} };
+ static const int32_t qstripProvokingTri[2][4] = { {0, 0, 0, 1}, {-1, 0, 0, 0} };
+ static const uint32_t qstripProvokingVertex[2][4] = { {0, 1, 2, 1}, {0, 0, 2, 1} };
+
+ switch (topo) {
+ case TOP_QUAD_LIST:
+ adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex];
+ vid = quadProvokingVertex[triIndex & 1][provokingVertex];
+ break;
+ case TOP_QUAD_STRIP:
+ adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex];
+ vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
+ break;
+ case TOP_TRIANGLE_STRIP:
+ adjustedTriIndex = triIndex;
+ vid = (triIndex & 1)
+ ? tristripProvokingVertex[provokingVertex]
+ : provokingVertex;
+ break;
+ default:
+ adjustedTriIndex = triIndex;
+ vid = provokingVertex;
+ break;
+ }
+
+ pa.AssembleSingle(inputSlot, adjustedTriIndex, attrib);
- for (uint32_t i = 0; i < NumVerts; ++i)
+ for (uint32_t i = 0; i < numVerts; ++i)
+ {
+ _mm_store_ps(pBuffer, attrib[vid]);
+ pBuffer += 4;
+ }
+ }
+ else
{
- _mm_store_ps(pBuffer, attrib[vid]);
- pBuffer += 4;
+ pa.AssembleSingle(inputSlot, triIndex, attrib);
+
+ for (uint32_t i = 0; i < numVerts; ++i)
+ {
+ _mm_store_ps(pBuffer, attrib[i]);
+ pBuffer += 4;
+ }
}
}
else
{
pa.AssembleSingle(inputSlot, triIndex, attrib);
- for (uint32_t i = 0; i < NumVerts; ++i)
+ for (uint32_t i = 0; i < numVerts; ++i)
{
_mm_store_ps(pBuffer, attrib[i]);
pBuffer += 4;
@@ -1534,16 +1551,66 @@ INLINE void ProcessAttributes(
// interpolation code in the pixel shader works correctly for the
// 3 topologies - point, line, tri. This effectively zeros out the
// effect of the missing vertices in the triangle interpolation.
- for (uint32_t i = NumVerts; i < 3; ++i)
+ for (uint32_t v = numVerts; v < 3; ++v)
{
- _mm_store_ps(pBuffer, attrib[NumVerts - 1]);
+ _mm_store_ps(pBuffer, attrib[numVerts - 1]);
pBuffer += 4;
}
- mapIdx++;
+ // check for constant source overrides
+ if (IsSwizzledT::value)
+ {
+ uint32_t mask = backendState.swizzleMap[i].componentOverrideMask;
+ if (mask)
+ {
+ DWORD comp;
+ while (_BitScanForward(&comp, mask))
+ {
+ mask &= ~(1 << comp);
+
+ float constantValue = 0.0f;
+ switch ((SWR_CONSTANT_SOURCE)backendState.swizzleMap[i].constantSource)
+ {
+ case SWR_CONSTANT_SOURCE_CONST_0000:
+ case SWR_CONSTANT_SOURCE_CONST_0001_FLOAT:
+ case SWR_CONSTANT_SOURCE_CONST_1111_FLOAT:
+ constantValue = constTable[backendState.swizzleMap[i].constantSource][comp];
+ break;
+ case SWR_CONSTANT_SOURCE_PRIM_ID:
+ constantValue = *(float*)&primId;
+ break;
+ }
+
+ // apply constant value to all 3 vertices
+ for (uint32_t v = 0; v < 3; ++v)
+ {
+ pAttribStart[comp + v * 4] = constantValue;
+ }
+ }
+ }
+ }
}
}
+
+typedef void(*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
+
+struct ProcessAttributesChooser
+{
+ typedef PFN_PROCESS_ATTRIBUTES FuncType;
+
+ template <typename... ArgsB>
+ static FuncType GetFunc()
+ {
+ return ProcessAttributes<ArgsB...>;
+ }
+};
+
+PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp)
+{
+ return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(NumVerts, IsSwizzled, HasConstantInterp);
+}
+
//////////////////////////////////////////////////////////////////////////
/// @brief Processes enabled user clip distances. Loads the active clip
/// distances from the PA, sets up barycentric equations, and
@@ -1742,6 +1809,10 @@ void BinTriangles(
const SWR_GS_STATE& gsState = state.gsState;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ // Select attribute processor
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
+ state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
simdscalar vRecipW0 = _simd_set1_ps(1.0f);
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
@@ -1951,8 +2022,7 @@ void BinTriangles(
// scan remaining valid triangles and bin each separately
while (_BitScanForward(&triIndex, triMask))
{
- uint32_t linkageCount = state.linkageCount;
- uint32_t linkageMask = state.linkageMask;
+ uint32_t linkageCount = state.backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
@@ -1972,7 +2042,7 @@ void BinTriangles(
float *pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
desc.pAttribs = pAttribs;
desc.numAttribs = linkageCount;
- ProcessAttributes<3>(pDC, pa, linkageMask, state.linkageMap, triIndex, desc.pAttribs);
+ pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs);
// store triangle vertex data
desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
@@ -2050,6 +2120,10 @@ void BinPoints(
const SWR_GS_STATE& gsState = state.gsState;
const SWR_RASTSTATE& rastState = state.rastState;
+ // Select attribute processor
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1,
+ state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
if (!feState.vpTransformDisable)
{
// perspective divide
@@ -2130,12 +2204,13 @@ void BinPoints(
uint32_t *pPrimID = (uint32_t *)&primID;
DWORD primIndex = 0;
+
+ const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+
// scan remaining valid triangles and bin each separately
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = state.linkageCount;
- uint32_t linkageMask = state.linkageMask;
-
+ uint32_t linkageCount = backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
@@ -2158,7 +2233,7 @@ void BinPoints(
desc.pAttribs = pAttribs;
desc.numAttribs = linkageCount;
- ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, pAttribs);
+ pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs);
// store raster tile aligned x, y, perspective correct z
float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
@@ -2265,11 +2340,11 @@ void BinPoints(
_simd_store_ps((float*)aPrimVertsZ, primVerts.z);
// scan remaining valid prims and bin each separately
+ const SWR_BACKEND_STATE& backendState = state.backendState;
DWORD primIndex;
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = state.linkageCount;
- uint32_t linkageMask = state.linkageMask;
+ uint32_t linkageCount = backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
@@ -2290,7 +2365,7 @@ void BinPoints(
// store active attribs
desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
desc.numAttribs = linkageCount;
- ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs);
+ pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
// store point vertex data
float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
@@ -2353,6 +2428,10 @@ void BinLines(
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_GS_STATE& gsState = state.gsState;
+ // Select attribute processor
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(2,
+ state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
simdscalar vRecipW0 = _simd_set1_ps(1.0f);
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
@@ -2485,8 +2564,7 @@ void BinLines(
DWORD primIndex;
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = state.linkageCount;
- uint32_t linkageMask = state.linkageMask;
+ uint32_t linkageCount = state.backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
@@ -2507,7 +2585,7 @@ void BinLines(
// store active attribs
desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
desc.numAttribs = linkageCount;
- ProcessAttributes<2>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs);
+ pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
// store line vertex data
desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
index 6aa73c1ddf1..64932af6145 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -1169,15 +1169,8 @@ struct PA_FACTORY
topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ)))
{
memset(&indexStore, 0, sizeof(indexStore));
- DWORD numAttribs;
- if (_BitScanReverse(&numAttribs, state.feAttribMask))
- {
- numAttribs++;
- }
- else
- {
- numAttribs = 0;
- }
+ uint32_t numAttribs = state.feNumAttributes;
+
new (&this->paCut) PA_STATE_CUT(pDC, (uint8_t*)&this->vertexStore[0], MAX_NUM_VERTS_PER_PRIM * KNOB_SIMD_WIDTH,
&this->indexStore[0], numVerts, numAttribs, state.topology, false);
cutPA = true;
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 9fc304a8c3f..0931c82b5d2 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -938,13 +938,34 @@ struct SWR_RASTSTATE
uint8_t clipDistanceMask;
};
+enum SWR_CONSTANT_SOURCE
+{
+ SWR_CONSTANT_SOURCE_CONST_0000,
+ SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
+ SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
+ SWR_CONSTANT_SOURCE_PRIM_ID
+};
+
+struct SWR_ATTRIB_SWIZZLE
+{
+ uint16_t sourceAttrib : 5; // source attribute
+ uint16_t constantSource : 2; // constant source to apply
+ uint16_t componentOverrideMask : 4; // override component with constant source
+};
+
// backend state
struct SWR_BACKEND_STATE
{
- uint32_t constantInterpolationMask;
- uint32_t pointSpriteTexCoordMask;
- uint8_t numAttributes;
- uint8_t numComponents[KNOB_NUM_ATTRIBUTES];
+ uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
+ uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
+
+ uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
+ uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
+
+ bool swizzleEnable; // when enabled, core will parse the swizzle map when
+ // setting up attributes for the backend, otherwise
+ // all attributes up to numAttributes will be sent
+ SWR_ATTRIB_SWIZZLE swizzleMap[32];
};
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
index 1d8e9a111ed..d3181cd29ec 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
@@ -80,12 +80,12 @@ enum ComponentEnable
enum ComponentControl
{
- NoStore = 0,
- StoreSrc = 1,
- Store0 = 2,
- Store1Fp = 3,
- Store1Int = 4,
- StoreVertexId = 5,
+ NoStore = 0,
+ StoreSrc = 1,
+ Store0 = 2,
+ Store1Fp = 3,
+ Store1Int = 4,
+ StoreVertexId = 5,
StoreInstanceId = 6
};
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index 4d1b604817b..ecb4545d13b 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -157,18 +157,6 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
{
struct swr_vertex_shader *swr_vs = ctx->vs;
- swr_vs->linkageMask = 0;
-
- for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) {
- switch (swr_vs->info.base.output_semantic_name[i]) {
- case TGSI_SEMANTIC_POSITION:
- break;
- default:
- swr_vs->linkageMask |= (1 << i);
- break;
- }
- }
-
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 5caaa5c7139..dac95ce42e7 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1373,16 +1373,13 @@ swr_update_derived(struct pipe_context *pipe,
}
}
- uint32_t linkage = ctx->vs->linkageMask;
- if (ctx->rasterizer->sprite_coord_enable)
- linkage |= (1 << ctx->vs->info.base.num_outputs);
-
- SwrSetLinkage(ctx->swrContext, linkage, NULL);
-
// set up backend state
SWR_BACKEND_STATE backendState = {0};
- backendState.numAttributes = 1;
- backendState.numComponents[0] = 4;
+ backendState.numAttributes =
+ ctx->vs->info.base.num_outputs - 1 +
+ (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
+ for (unsigned i = 0; i < backendState.numAttributes; i++)
+ backendState.numComponents[i] = 4;
backendState.constantInterpolationMask =
ctx->rasterizer->flatshade ?
ctx->fs->flatConstantMask :
diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h
index cb699641274..dcb1145a362 100644
--- a/src/gallium/drivers/swr/swr_state.h
+++ b/src/gallium/drivers/swr/swr_state.h
@@ -53,7 +53,6 @@ typedef ShaderVariant<PFN_PIXEL_KERNEL> VariantFS;
struct swr_vertex_shader {
struct pipe_shader_state pipe;
struct lp_tgsi_info info;
- unsigned linkageMask;
std::unordered_map<swr_jit_vs_key, std::unique_ptr<VariantVS>> map;
SWR_STREAMOUT_STATE soState;
PFN_SO_FUNC soFunc[PIPE_PRIM_MAX] {0};