diff options
author | Tim Rowley <[email protected]> | 2016-07-12 15:03:42 -0600 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2016-07-20 10:22:15 -0500 |
commit | efdaf5fa3e74ca4f3d9217dc6955aef6dc698a68 (patch) | |
tree | 19ed6e0186ecd93e1b52e07704cf58c8b87ae860 /src | |
parent | a5846fb75abae86dee29b2948b5d6462d84467c3 (diff) |
swr: [rasterizer] attribute swizzling and linkage
Add support for enhanced attribute swizzling. Currently supports constant
source overrides to handle PrimitiveID support. No support yet for input
select swizzling or wrap shortest. Removes obsoleted linkageMask and
associated code.
Signed-off-by: Tim Rowley <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 56 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/clip.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/context.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 222 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/pa.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/state.h | 29 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_shader.cpp | 12 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_state.cpp | 13 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_state.h | 1 |
11 files changed, 218 insertions, 171 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index aface7a77ef..c3a1539b506 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -606,31 +606,6 @@ void SwrSetBlendFunc( pState->pfnBlendFunc[renderTarget] = pfnBlendFunc; } -void SwrSetLinkage( - HANDLE hContext, - uint32_t mask, - const uint8_t* pMap) -{ - API_STATE* pState = GetDrawState(GetContext(hContext)); - - static const uint8_t IDENTITY_MAP[] = - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - }; - static_assert(sizeof(IDENTITY_MAP) == sizeof(pState->linkageMap), - "Update for new value of MAX_ATTRIBUTES"); - - pState->linkageMask = mask; - pState->linkageCount = _mm_popcnt_u32(mask); - - if (!pMap) - { - pMap = IDENTITY_MAP; - } - memcpy(pState->linkageMap, pMap, pState->linkageCount); -} - // update guardband multipliers for the viewport void updateGuardband(API_STATE *pState) { @@ -847,25 +822,44 @@ void SetupPipeline(DRAW_CONTEXT *pDC) (pState->state.depthStencilState.depthWriteEnable == FALSE) && (pState->state.depthStencilState.stencilTestEnable == FALSE) && (pState->state.depthStencilState.stencilWriteEnable == FALSE) && - (pState->state.linkageCount == 0)) + (pState->state.backendState.numAttributes == 0)) { pState->pfnProcessPrims = nullptr; - pState->state.linkageMask = 0; } if (pState->state.soState.rasterizerDisable == true) { pState->pfnProcessPrims = nullptr; - pState->state.linkageMask = 0; } - // set up the frontend attrib mask - pState->state.feAttribMask = pState->state.linkageMask; + // set up the frontend attribute count + pState->state.feNumAttributes = 0; + const SWR_BACKEND_STATE& backendState = pState->state.backendState; + if (backendState.swizzleEnable) + { + // attribute swizzling is enabled, iterate over the map and record the max attribute used + for (uint32_t i = 0; i < backendState.numAttributes; ++i) + { + pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1); + } + } + else + { + pState->state.feNumAttributes = pState->state.backendState.numAttributes; + } + if (pState->state.soState.soEnable) { + uint32_t streamMasks = 0; for (uint32_t i = 0; i < 4; ++i) { - pState->state.feAttribMask |= pState->state.soState.streamMasks[i]; + streamMasks |= pState->state.soState.streamMasks[i]; + } + + DWORD maxAttrib; + if (_BitScanReverse(&maxAttrib, streamMasks)) + { + pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1)); } } diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 04cdb9e4e65..ab56cab772e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -330,19 +330,6 @@ void SWR_API SwrSetBlendFunc( PFN_BLEND_JIT_FUNC pfnBlendFunc); ////////////////////////////////////////////////////////////////////////// -/// @brief Set linkage mask -/// @param hContext - Handle passed back from SwrCreateContext -/// @param mask - Specifies which vertex outputs are are needed by PS. -/// @param pMap - (Optional)Linkage map to specify where FE attributes are -/// gathered from to supply PS attribute values. The length -/// of the map buffer needs to match the number of set bits -/// in "mask". -void SWR_API SwrSetLinkage( - HANDLE hContext, - uint32_t mask, - const uint8_t* pMap); - -////////////////////////////////////////////////////////////////////////// /// @brief SwrDraw /// @param hContext - Handle passed back from SwrCreateContext /// @param topology - Specifies topology for draw. diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 1a6fc6d2873..b2b3bb4e6fd 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -324,16 +324,13 @@ public: } // assemble attribs - DWORD slot = 0; - uint32_t mapIdx = 0; - uint32_t tmpLinkage = this->state.linkageMask; + const SWR_BACKEND_STATE& backendState = this->state.backendState; int32_t maxSlot = -1; - while (_BitScanForward(&slot, tmpLinkage)) + for (uint32_t slot = 0; slot < backendState.numAttributes; ++slot) { - tmpLinkage &= ~(1 << slot); // Compute absolute attrib slot in vertex array - uint32_t mapSlot = this->state.linkageMap[mapIdx++]; + uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot; maxSlot = std::max<int32_t>(maxSlot, mapSlot); uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot; diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index be4c2e94b42..13dcdfca2ee 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -264,15 +264,8 @@ OSALIGNLINE(struct) API_STATE PFN_DS_FUNC pfnDsFunc; SWR_TS_STATE tsState; - // Specifies which VS outputs are sent to PS. - // Does not include position - uint32_t linkageMask; - uint32_t linkageCount; - uint8_t linkageMap[MAX_ATTRIBUTES]; - - // attrib mask, specifies the total set of attributes used - // by the frontend (vs, so, gs) - uint32_t feAttribMask; + // Number of attributes used by the frontend (vs, so, gs) + uint32_t feNumAttributes; PRIMITIVE_TOPOLOGY topology; bool forceFront; diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index cc8ebda35bc..8537c59033c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -794,15 +794,7 @@ static void GeometryShaderStage( uint8_t* pBase = pInstanceBase + instance * instanceStride; uint8_t* pCutBase = pCutBufferBase + instance * cutInstanceStride; - DWORD numAttribs; - if (_BitScanReverse(&numAttribs, state.feAttribMask)) - { - numAttribs++; - } - else - { - numAttribs = 0; - } + uint32_t numAttribs = state.feNumAttributes; for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream) { @@ -1445,7 +1437,6 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc( return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization); } - ////////////////////////////////////////////////////////////////////////// /// @brief Processes attributes for the backend based on linkage mask and /// linkage map. Essentially just doing an SOA->AOS conversion and pack. @@ -1455,75 +1446,101 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc( /// @param pLinkageMap - maps VS attribute slot to PS slot /// @param triIndex - Triangle to process attributes for /// @param pBuffer - Output result -template<uint32_t NumVerts> +template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT> INLINE void ProcessAttributes( DRAW_CONTEXT *pDC, PA_STATE&pa, - uint32_t linkageMask, - const uint8_t* pLinkageMap, uint32_t triIndex, + uint32_t primId, float *pBuffer) { - DWORD slot = 0; - uint32_t mapIdx = 0; - LONG constantInterpMask = pDC->pState->state.backendState.constantInterpolationMask; + static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT"); + const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState; + LONG constantInterpMask = backendState.constantInterpolationMask; const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex; const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology; - while (_BitScanForward(&slot, linkageMask)) + static const float constTable[3][4] = { + {0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 1.0f}, + {1.0f, 1.0f, 1.0f, 1.0f} + }; + + for (uint32_t i = 0; i < backendState.numAttributes; ++i) { - linkageMask &= ~(1 << slot); // done with this bit. + uint32_t inputSlot; + if (IsSwizzledT::value) + { + SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i]; + inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib; - // compute absolute slot in vertex attrib array - uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + pLinkageMap[mapIdx]; + } + else + { + inputSlot = VERTEX_ATTRIB_START_SLOT + i; + } __m128 attrib[3]; // triangle attribs (always 4 wide) + static const uint32_t numVerts = NumVertsT::value < 3 ? NumVertsT::value : 3; + float* pAttribStart = pBuffer; - if (_bittest(&constantInterpMask, mapIdx)) + if (HasConstantInterpT::value) { - uint32_t vid; - static const uint32_t tristripProvokingVertex[] = {0, 2, 1}; - static const int32_t quadProvokingTri[2][4] = {{0, 0, 0, 1}, {0, -1, 0, 0}}; - static const uint32_t quadProvokingVertex[2][4] = {{0, 1, 2, 2}, {0, 1, 1, 2}}; - static const int32_t qstripProvokingTri[2][4] = {{0, 0, 0, 1}, {-1, 0, 0, 0}}; - static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}}; - - switch (topo) { - case TOP_QUAD_LIST: - pa.AssembleSingle(inputSlot, - triIndex + quadProvokingTri[triIndex & 1][provokingVertex], - attrib); - vid = quadProvokingVertex[triIndex & 1][provokingVertex]; - break; - case TOP_QUAD_STRIP: - pa.AssembleSingle(inputSlot, - triIndex + qstripProvokingTri[triIndex & 1][provokingVertex], - attrib); - vid = qstripProvokingVertex[triIndex & 1][provokingVertex]; - break; - case TOP_TRIANGLE_STRIP: - pa.AssembleSingle(inputSlot, triIndex, attrib); - vid = (triIndex & 1) - ? tristripProvokingVertex[provokingVertex] - : provokingVertex; - break; - default: - pa.AssembleSingle(inputSlot, triIndex, attrib); - vid = provokingVertex; - break; - } + if (_bittest(&constantInterpMask, i)) + { + uint32_t vid; + uint32_t adjustedTriIndex; + static const uint32_t tristripProvokingVertex[] = { 0, 2, 1 }; + static const int32_t quadProvokingTri[2][4] = { {0, 0, 0, 1}, {0, -1, 0, 0} }; + static const uint32_t quadProvokingVertex[2][4] = { {0, 1, 2, 2}, {0, 1, 1, 2} }; + static const int32_t qstripProvokingTri[2][4] = { {0, 0, 0, 1}, {-1, 0, 0, 0} }; + static const uint32_t qstripProvokingVertex[2][4] = { {0, 1, 2, 1}, {0, 0, 2, 1} }; + + switch (topo) { + case TOP_QUAD_LIST: + adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex]; + vid = quadProvokingVertex[triIndex & 1][provokingVertex]; + break; + case TOP_QUAD_STRIP: + adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex]; + vid = qstripProvokingVertex[triIndex & 1][provokingVertex]; + break; + case TOP_TRIANGLE_STRIP: + adjustedTriIndex = triIndex; + vid = (triIndex & 1) + ? tristripProvokingVertex[provokingVertex] + : provokingVertex; + break; + default: + adjustedTriIndex = triIndex; + vid = provokingVertex; + break; + } + + pa.AssembleSingle(inputSlot, adjustedTriIndex, attrib); - for (uint32_t i = 0; i < NumVerts; ++i) + for (uint32_t i = 0; i < numVerts; ++i) + { + _mm_store_ps(pBuffer, attrib[vid]); + pBuffer += 4; + } + } + else { - _mm_store_ps(pBuffer, attrib[vid]); - pBuffer += 4; + pa.AssembleSingle(inputSlot, triIndex, attrib); + + for (uint32_t i = 0; i < numVerts; ++i) + { + _mm_store_ps(pBuffer, attrib[i]); + pBuffer += 4; + } } } else { pa.AssembleSingle(inputSlot, triIndex, attrib); - for (uint32_t i = 0; i < NumVerts; ++i) + for (uint32_t i = 0; i < numVerts; ++i) { _mm_store_ps(pBuffer, attrib[i]); pBuffer += 4; @@ -1534,16 +1551,66 @@ INLINE void ProcessAttributes( // interpolation code in the pixel shader works correctly for the // 3 topologies - point, line, tri. This effectively zeros out the // effect of the missing vertices in the triangle interpolation. - for (uint32_t i = NumVerts; i < 3; ++i) + for (uint32_t v = numVerts; v < 3; ++v) { - _mm_store_ps(pBuffer, attrib[NumVerts - 1]); + _mm_store_ps(pBuffer, attrib[numVerts - 1]); pBuffer += 4; } - mapIdx++; + // check for constant source overrides + if (IsSwizzledT::value) + { + uint32_t mask = backendState.swizzleMap[i].componentOverrideMask; + if (mask) + { + DWORD comp; + while (_BitScanForward(&comp, mask)) + { + mask &= ~(1 << comp); + + float constantValue = 0.0f; + switch ((SWR_CONSTANT_SOURCE)backendState.swizzleMap[i].constantSource) + { + case SWR_CONSTANT_SOURCE_CONST_0000: + case SWR_CONSTANT_SOURCE_CONST_0001_FLOAT: + case SWR_CONSTANT_SOURCE_CONST_1111_FLOAT: + constantValue = constTable[backendState.swizzleMap[i].constantSource][comp]; + break; + case SWR_CONSTANT_SOURCE_PRIM_ID: + constantValue = *(float*)&primId; + break; + } + + // apply constant value to all 3 vertices + for (uint32_t v = 0; v < 3; ++v) + { + pAttribStart[comp + v * 4] = constantValue; + } + } + } + } } } + +typedef void(*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*); + +struct ProcessAttributesChooser +{ + typedef PFN_PROCESS_ATTRIBUTES FuncType; + + template <typename... ArgsB> + static FuncType GetFunc() + { + return ProcessAttributes<ArgsB...>; + } +}; + +PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp) +{ + return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(NumVerts, IsSwizzled, HasConstantInterp); +} + ////////////////////////////////////////////////////////////////////////// /// @brief Processes enabled user clip distances. Loads the active clip /// distances from the PA, sets up barycentric equations, and @@ -1742,6 +1809,10 @@ void BinTriangles( const SWR_GS_STATE& gsState = state.gsState; MacroTileMgr *pTileMgr = pDC->pTileMgr; + // Select attribute processor + PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3, + state.backendState.swizzleEnable, state.backendState.constantInterpolationMask); + simdscalar vRecipW0 = _simd_set1_ps(1.0f); simdscalar vRecipW1 = _simd_set1_ps(1.0f); @@ -1951,8 +2022,7 @@ void BinTriangles( // scan remaining valid triangles and bin each separately while (_BitScanForward(&triIndex, triMask)) { - uint32_t linkageCount = state.linkageCount; - uint32_t linkageMask = state.linkageMask; + uint32_t linkageCount = state.backendState.numAttributes; uint32_t numScalarAttribs = linkageCount * 4; BE_WORK work; @@ -1972,7 +2042,7 @@ void BinTriangles( float *pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16); desc.pAttribs = pAttribs; desc.numAttribs = linkageCount; - ProcessAttributes<3>(pDC, pa, linkageMask, state.linkageMap, triIndex, desc.pAttribs); + pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs); // store triangle vertex data desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16); @@ -2050,6 +2120,10 @@ void BinPoints( const SWR_GS_STATE& gsState = state.gsState; const SWR_RASTSTATE& rastState = state.rastState; + // Select attribute processor + PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1, + state.backendState.swizzleEnable, state.backendState.constantInterpolationMask); + if (!feState.vpTransformDisable) { // perspective divide @@ -2130,12 +2204,13 @@ void BinPoints( uint32_t *pPrimID = (uint32_t *)&primID; DWORD primIndex = 0; + + const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState; + // scan remaining valid triangles and bin each separately while (_BitScanForward(&primIndex, primMask)) { - uint32_t linkageCount = state.linkageCount; - uint32_t linkageMask = state.linkageMask; - + uint32_t linkageCount = backendState.numAttributes; uint32_t numScalarAttribs = linkageCount * 4; BE_WORK work; @@ -2158,7 +2233,7 @@ void BinPoints( desc.pAttribs = pAttribs; desc.numAttribs = linkageCount; - ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, pAttribs); + pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs); // store raster tile aligned x, y, perspective correct z float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16); @@ -2265,11 +2340,11 @@ void BinPoints( _simd_store_ps((float*)aPrimVertsZ, primVerts.z); // scan remaining valid prims and bin each separately + const SWR_BACKEND_STATE& backendState = state.backendState; DWORD primIndex; while (_BitScanForward(&primIndex, primMask)) { - uint32_t linkageCount = state.linkageCount; - uint32_t linkageMask = state.linkageMask; + uint32_t linkageCount = backendState.numAttributes; uint32_t numScalarAttribs = linkageCount * 4; BE_WORK work; @@ -2290,7 +2365,7 @@ void BinPoints( // store active attribs desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16); desc.numAttribs = linkageCount; - ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs); + pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs); // store point vertex data float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16); @@ -2353,6 +2428,10 @@ void BinLines( const SWR_FRONTEND_STATE& feState = state.frontendState; const SWR_GS_STATE& gsState = state.gsState; + // Select attribute processor + PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(2, + state.backendState.swizzleEnable, state.backendState.constantInterpolationMask); + simdscalar vRecipW0 = _simd_set1_ps(1.0f); simdscalar vRecipW1 = _simd_set1_ps(1.0f); @@ -2485,8 +2564,7 @@ void BinLines( DWORD primIndex; while (_BitScanForward(&primIndex, primMask)) { - uint32_t linkageCount = state.linkageCount; - uint32_t linkageMask = state.linkageMask; + uint32_t linkageCount = state.backendState.numAttributes; uint32_t numScalarAttribs = linkageCount * 4; BE_WORK work; @@ -2507,7 +2585,7 @@ void BinLines( // store active attribs desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16); desc.numAttribs = linkageCount; - ProcessAttributes<2>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs); + pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs); // store line vertex data desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16); diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h index 6aa73c1ddf1..64932af6145 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa.h +++ b/src/gallium/drivers/swr/rasterizer/core/pa.h @@ -1169,15 +1169,8 @@ struct PA_FACTORY topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ))) { memset(&indexStore, 0, sizeof(indexStore)); - DWORD numAttribs; - if (_BitScanReverse(&numAttribs, state.feAttribMask)) - { - numAttribs++; - } - else - { - numAttribs = 0; - } + uint32_t numAttribs = state.feNumAttributes; + new (&this->paCut) PA_STATE_CUT(pDC, (uint8_t*)&this->vertexStore[0], MAX_NUM_VERTS_PER_PRIM * KNOB_SIMD_WIDTH, &this->indexStore[0], numVerts, numAttribs, state.topology, false); cutPA = true; diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 9fc304a8c3f..0931c82b5d2 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -938,13 +938,34 @@ struct SWR_RASTSTATE uint8_t clipDistanceMask; }; +enum SWR_CONSTANT_SOURCE +{ + SWR_CONSTANT_SOURCE_CONST_0000, + SWR_CONSTANT_SOURCE_CONST_0001_FLOAT, + SWR_CONSTANT_SOURCE_CONST_1111_FLOAT, + SWR_CONSTANT_SOURCE_PRIM_ID +}; + +struct SWR_ATTRIB_SWIZZLE +{ + uint16_t sourceAttrib : 5; // source attribute + uint16_t constantSource : 2; // constant source to apply + uint16_t componentOverrideMask : 4; // override component with constant source +}; + // backend state struct SWR_BACKEND_STATE { - uint32_t constantInterpolationMask; - uint32_t pointSpriteTexCoordMask; - uint8_t numAttributes; - uint8_t numComponents[KNOB_NUM_ATTRIBUTES]; + uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation + uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates + + uint8_t numAttributes; // total number of attributes to send to backend (up to 32) + uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components + + bool swizzleEnable; // when enabled, core will parse the swizzle map when + // setting up attributes for the backend, otherwise + // all attributes up to numAttributes will be sent + SWR_ATTRIB_SWIZZLE swizzleMap[32]; }; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h index 1d8e9a111ed..d3181cd29ec 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h @@ -80,12 +80,12 @@ enum ComponentEnable enum ComponentControl { - NoStore = 0, - StoreSrc = 1, - Store0 = 2, - Store1Fp = 3, - Store1Int = 4, - StoreVertexId = 5, + NoStore = 0, + StoreSrc = 1, + Store0 = 2, + Store1Fp = 3, + Store1Int = 4, + StoreVertexId = 5, StoreInstanceId = 6 }; diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index 4d1b604817b..ecb4545d13b 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -157,18 +157,6 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) { struct swr_vertex_shader *swr_vs = ctx->vs; - swr_vs->linkageMask = 0; - - for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) { - switch (swr_vs->info.base.output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - break; - default: - swr_vs->linkageMask |= (1 << i); - break; - } - } - LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 5caaa5c7139..dac95ce42e7 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -1373,16 +1373,13 @@ swr_update_derived(struct pipe_context *pipe, } } - uint32_t linkage = ctx->vs->linkageMask; - if (ctx->rasterizer->sprite_coord_enable) - linkage |= (1 << ctx->vs->info.base.num_outputs); - - SwrSetLinkage(ctx->swrContext, linkage, NULL); - // set up backend state SWR_BACKEND_STATE backendState = {0}; - backendState.numAttributes = 1; - backendState.numComponents[0] = 4; + backendState.numAttributes = + ctx->vs->info.base.num_outputs - 1 + + (ctx->rasterizer->sprite_coord_enable ? 1 : 0); + for (unsigned i = 0; i < backendState.numAttributes; i++) + backendState.numComponents[i] = 4; backendState.constantInterpolationMask = ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h index cb699641274..dcb1145a362 100644 --- a/src/gallium/drivers/swr/swr_state.h +++ b/src/gallium/drivers/swr/swr_state.h @@ -53,7 +53,6 @@ typedef ShaderVariant<PFN_PIXEL_KERNEL> VariantFS; struct swr_vertex_shader { struct pipe_shader_state pipe; struct lp_tgsi_info info; - unsigned linkageMask; std::unordered_map<swr_jit_vs_key, std::unique_ptr<VariantVS>> map; SWR_STREAMOUT_STATE soState; PFN_SO_FUNC soFunc[PIPE_PRIM_MAX] {0}; |