diff options
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 56 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/clip.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/context.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 222 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/pa.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/state.h | 29 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_shader.cpp | 12 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_state.cpp | 13 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_state.h | 1 |
11 files changed, 218 insertions, 171 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index aface7a77ef..c3a1539b506 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -606,31 +606,6 @@ void SwrSetBlendFunc( pState->pfnBlendFunc[renderTarget] = pfnBlendFunc; } -void SwrSetLinkage( - HANDLE hContext, - uint32_t mask, - const uint8_t* pMap) -{ - API_STATE* pState = GetDrawState(GetContext(hContext)); - - static const uint8_t IDENTITY_MAP[] = - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - }; - static_assert(sizeof(IDENTITY_MAP) == sizeof(pState->linkageMap), - "Update for new value of MAX_ATTRIBUTES"); - - pState->linkageMask = mask; - pState->linkageCount = _mm_popcnt_u32(mask); - - if (!pMap) - { - pMap = IDENTITY_MAP; - } - memcpy(pState->linkageMap, pMap, pState->linkageCount); -} - // update guardband multipliers for the viewport void updateGuardband(API_STATE *pState) { @@ -847,25 +822,44 @@ void SetupPipeline(DRAW_CONTEXT *pDC) (pState->state.depthStencilState.depthWriteEnable == FALSE) && (pState->state.depthStencilState.stencilTestEnable == FALSE) && (pState->state.depthStencilState.stencilWriteEnable == FALSE) && - (pState->state.linkageCount == 0)) + (pState->state.backendState.numAttributes == 0)) { pState->pfnProcessPrims = nullptr; - pState->state.linkageMask = 0; } if (pState->state.soState.rasterizerDisable == true) { pState->pfnProcessPrims = nullptr; - pState->state.linkageMask = 0; } - // set up the frontend attrib mask - pState->state.feAttribMask = pState->state.linkageMask; + // set up the frontend attribute count + pState->state.feNumAttributes = 0; + const SWR_BACKEND_STATE& backendState = pState->state.backendState; + if (backendState.swizzleEnable) + { + // attribute swizzling is enabled, iterate over the map and record the max attribute used + for (uint32_t i = 0; i < backendState.numAttributes; ++i) + { + pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1); + } + } + else + { + pState->state.feNumAttributes = pState->state.backendState.numAttributes; + } + if (pState->state.soState.soEnable) { + uint32_t streamMasks = 0; for (uint32_t i = 0; i < 4; ++i) { - pState->state.feAttribMask |= pState->state.soState.streamMasks[i]; + streamMasks |= pState->state.soState.streamMasks[i]; + } + + DWORD maxAttrib; + if (_BitScanReverse(&maxAttrib, streamMasks)) + { + pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1)); } } diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 04cdb9e4e65..ab56cab772e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -330,19 +330,6 @@ void SWR_API SwrSetBlendFunc( PFN_BLEND_JIT_FUNC pfnBlendFunc); ////////////////////////////////////////////////////////////////////////// -/// @brief Set linkage mask -/// @param hContext - Handle passed back from SwrCreateContext -/// @param mask - Specifies which vertex outputs are are needed by PS. -/// @param pMap - (Optional)Linkage map to specify where FE attributes are -/// gathered from to supply PS attribute values. The length -/// of the map buffer needs to match the number of set bits -/// in "mask". -void SWR_API SwrSetLinkage( - HANDLE hContext, - uint32_t mask, - const uint8_t* pMap); - -////////////////////////////////////////////////////////////////////////// /// @brief SwrDraw /// @param hContext - Handle passed back from SwrCreateContext /// @param topology - Specifies topology for draw. diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 1a6fc6d2873..b2b3bb4e6fd 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -324,16 +324,13 @@ public: } // assemble attribs - DWORD slot = 0; - uint32_t mapIdx = 0; - uint32_t tmpLinkage = this->state.linkageMask; + const SWR_BACKEND_STATE& backendState = this->state.backendState; int32_t maxSlot = -1; - while (_BitScanForward(&slot, tmpLinkage)) + for (uint32_t slot = 0; slot < backendState.numAttributes; ++slot) { - tmpLinkage &= ~(1 << slot); // Compute absolute attrib slot in vertex array - uint32_t mapSlot = this->state.linkageMap[mapIdx++]; + uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot; maxSlot = std::max<int32_t>(maxSlot, mapSlot); uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot; diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index be4c2e94b42..13dcdfca2ee 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -264,15 +264,8 @@ OSALIGNLINE(struct) API_STATE PFN_DS_FUNC pfnDsFunc; SWR_TS_STATE tsState; - // Specifies which VS outputs are sent to PS. - // Does not include position - uint32_t linkageMask; - uint32_t linkageCount; - uint8_t linkageMap[MAX_ATTRIBUTES]; - - // attrib mask, specifies the total set of attributes used - // by the frontend (vs, so, gs) - uint32_t feAttribMask; + // Number of attributes used by the frontend (vs, so, gs) + uint32_t feNumAttributes; PRIMITIVE_TOPOLOGY topology; bool forceFront; diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index cc8ebda35bc..8537c59033c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -794,15 +794,7 @@ static void GeometryShaderStage( uint8_t* pBase = pInstanceBase + instance * instanceStride; uint8_t* pCutBase = pCutBufferBase + instance * cutInstanceStride; - DWORD numAttribs; - if (_BitScanReverse(&numAttribs, state.feAttribMask)) - { - numAttribs++; - } - else - { - numAttribs = 0; - } + uint32_t numAttribs = state.feNumAttributes; for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream) { @@ -1445,7 +1437,6 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc( return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization); } - ////////////////////////////////////////////////////////////////////////// /// @brief Processes attributes for the backend based on linkage mask and /// linkage map. Essentially just doing an SOA->AOS conversion and pack. @@ -1455,75 +1446,101 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc( /// @param pLinkageMap - maps VS attribute slot to PS slot /// @param triIndex - Triangle to process attributes for /// @param pBuffer - Output result -template<uint32_t NumVerts> +template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT> INLINE void ProcessAttributes( DRAW_CONTEXT *pDC, PA_STATE&pa, - uint32_t linkageMask, - const uint8_t* pLinkageMap, uint32_t triIndex, + uint32_t primId, float *pBuffer) { - DWORD slot = 0; - uint32_t mapIdx = 0; - LONG constantInterpMask = pDC->pState->state.backendState.constantInterpolationMask; + static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT"); + const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState; + LONG constantInterpMask = backendState.constantInterpolationMask; const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex; const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology; - while (_BitScanForward(&slot, linkageMask)) + static const float constTable[3][4] = { + {0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 1.0f}, + {1.0f, 1.0f, 1.0f, 1.0f} + }; + + for (uint32_t i = 0; i < backendState.numAttributes; ++i) { - linkageMask &= ~(1 << slot); // done with this bit. + uint32_t inputSlot; + if (IsSwizzledT::value) + { + SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i]; + inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib; - // compute absolute slot in vertex attrib array - uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + pLinkageMap[mapIdx]; + } + else + { + inputSlot = VERTEX_ATTRIB_START_SLOT + i; + } __m128 attrib[3]; // triangle attribs (always 4 wide) + static const uint32_t numVerts = NumVertsT::value < 3 ? NumVertsT::value : 3; + float* pAttribStart = pBuffer; - if (_bittest(&constantInterpMask, mapIdx)) + if (HasConstantInterpT::value) { - uint32_t vid; - static const uint32_t tristripProvokingVertex[] = {0, 2, 1}; - static const int32_t quadProvokingTri[2][4] = {{0, 0, 0, 1}, {0, -1, 0, 0}}; - static const uint32_t quadProvokingVertex[2][4] = {{0, 1, 2, 2}, {0, 1, 1, 2}}; - static const int32_t qstripProvokingTri[2][4] = {{0, 0, 0, 1}, {-1, 0, 0, 0}}; - static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}}; - - switch (topo) { - case TOP_QUAD_LIST: - pa.AssembleSingle(inputSlot, - triIndex + quadProvokingTri[triIndex & 1][provokingVertex], - attrib); - vid = quadProvokingVertex[triIndex & 1][provokingVertex]; - break; - case TOP_QUAD_STRIP: - pa.AssembleSingle(inputSlot, - triIndex + qstripProvokingTri[triIndex & 1][provokingVertex], - attrib); - vid = qstripProvokingVertex[triIndex & 1][provokingVertex]; - break; - case TOP_TRIANGLE_STRIP: - pa.AssembleSingle(inputSlot, triIndex, attrib); - vid = (triIndex & 1) - ? tristripProvokingVertex[provokingVertex] - : provokingVertex; - break; - default: - pa.AssembleSingle(inputSlot, triIndex, attrib); - vid = provokingVertex; - break; - } + if (_bittest(&constantInterpMask, i)) + { + uint32_t vid; + uint32_t adjustedTriIndex; + static const uint32_t tristripProvokingVertex[] = { 0, 2, 1 }; + static const int32_t quadProvokingTri[2][4] = { {0, 0, 0, 1}, {0, -1, 0, 0} }; + static const uint32_t quadProvokingVertex[2][4] = { {0, 1, 2, 2}, {0, 1, 1, 2} }; + static const int32_t qstripProvokingTri[2][4] = { {0, 0, 0, 1}, {-1, 0, 0, 0} }; + static const uint32_t qstripProvokingVertex[2][4] = { {0, 1, 2, 1}, {0, 0, 2, 1} }; + + switch (topo) { + case TOP_QUAD_LIST: + adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex]; + vid = quadProvokingVertex[triIndex & 1][provokingVertex]; + break; + case TOP_QUAD_STRIP: + adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex]; + vid = qstripProvokingVertex[triIndex & 1][provokingVertex]; + break; + case TOP_TRIANGLE_STRIP: + adjustedTriIndex = triIndex; + vid = (triIndex & 1) + ? tristripProvokingVertex[provokingVertex] + : provokingVertex; + break; + default: + adjustedTriIndex = triIndex; + vid = provokingVertex; + break; + } + + pa.AssembleSingle(inputSlot, adjustedTriIndex, attrib); - for (uint32_t i = 0; i < NumVerts; ++i) + for (uint32_t i = 0; i < numVerts; ++i) + { + _mm_store_ps(pBuffer, attrib[vid]); + pBuffer += 4; + } + } + else { - _mm_store_ps(pBuffer, attrib[vid]); - pBuffer += 4; + pa.AssembleSingle(inputSlot, triIndex, attrib); + + for (uint32_t i = 0; i < numVerts; ++i) + { + _mm_store_ps(pBuffer, attrib[i]); + pBuffer += 4; + } } } else { pa.AssembleSingle(inputSlot, triIndex, attrib); - for (uint32_t i = 0; i < NumVerts; ++i) + for (uint32_t i = 0; i < numVerts; ++i) { _mm_store_ps(pBuffer, attrib[i]); pBuffer += 4; @@ -1534,16 +1551,66 @@ INLINE void ProcessAttributes( // interpolation code in the pixel shader works correctly for the // 3 topologies - point, line, tri. This effectively zeros out the // effect of the missing vertices in the triangle interpolation. - for (uint32_t i = NumVerts; i < 3; ++i) + for (uint32_t v = numVerts; v < 3; ++v) { - _mm_store_ps(pBuffer, attrib[NumVerts - 1]); + _mm_store_ps(pBuffer, attrib[numVerts - 1]); pBuffer += 4; } - mapIdx++; + // check for constant source overrides + if (IsSwizzledT::value) + { + uint32_t mask = backendState.swizzleMap[i].componentOverrideMask; + if (mask) + { + DWORD comp; + while (_BitScanForward(&comp, mask)) + { + mask &= ~(1 << comp); + + float constantValue = 0.0f; + switch ((SWR_CONSTANT_SOURCE)backendState.swizzleMap[i].constantSource) + { + case SWR_CONSTANT_SOURCE_CONST_0000: + case SWR_CONSTANT_SOURCE_CONST_0001_FLOAT: + case SWR_CONSTANT_SOURCE_CONST_1111_FLOAT: + constantValue = constTable[backendState.swizzleMap[i].constantSource][comp]; + break; + case SWR_CONSTANT_SOURCE_PRIM_ID: + constantValue = *(float*)&primId; + break; + } + + // apply constant value to all 3 vertices + for (uint32_t v = 0; v < 3; ++v) + { + pAttribStart[comp + v * 4] = constantValue; + } + } + } + } } } + +typedef void(*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*); + +struct ProcessAttributesChooser +{ + typedef PFN_PROCESS_ATTRIBUTES FuncType; + + template <typename... ArgsB> + static FuncType GetFunc() + { + return ProcessAttributes<ArgsB...>; + } +}; + +PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp) +{ + return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(NumVerts, IsSwizzled, HasConstantInterp); +} + ////////////////////////////////////////////////////////////////////////// /// @brief Processes enabled user clip distances. Loads the active clip /// distances from the PA, sets up barycentric equations, and @@ -1742,6 +1809,10 @@ void BinTriangles( const SWR_GS_STATE& gsState = state.gsState; MacroTileMgr *pTileMgr = pDC->pTileMgr; + // Select attribute processor + PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3, + state.backendState.swizzleEnable, state.backendState.constantInterpolationMask); + simdscalar vRecipW0 = _simd_set1_ps(1.0f); simdscalar vRecipW1 = _simd_set1_ps(1.0f); @@ -1951,8 +2022,7 @@ void BinTriangles( // scan remaining valid triangles and bin each separately while (_BitScanForward(&triIndex, triMask)) { - uint32_t linkageCount = state.linkageCount; - uint32_t linkageMask = state.linkageMask; + uint32_t linkageCount = state.backendState.numAttributes; uint32_t numScalarAttribs = linkageCount * 4; BE_WORK work; @@ -1972,7 +2042,7 @@ void BinTriangles( float *pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16); desc.pAttribs = pAttribs; desc.numAttribs = linkageCount; - ProcessAttributes<3>(pDC, pa, linkageMask, state.linkageMap, triIndex, desc.pAttribs); + pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs); // store triangle vertex data desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16); @@ -2050,6 +2120,10 @@ void BinPoints( const SWR_GS_STATE& gsState = state.gsState; const SWR_RASTSTATE& rastState = state.rastState; + // Select attribute processor + PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1, + state.backendState.swizzleEnable, state.backendState.constantInterpolationMask); + if (!feState.vpTransformDisable) { // perspective divide @@ -2130,12 +2204,13 @@ void BinPoints( uint32_t *pPrimID = (uint32_t *)&primID; DWORD primIndex = 0; + + const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState; + // scan remaining valid triangles and bin each separately while (_BitScanForward(&primIndex, primMask)) { - uint32_t linkageCount = state.linkageCount; - uint32_t linkageMask = state.linkageMask; - + uint32_t linkageCount = backendState.numAttributes; uint32_t numScalarAttribs = linkageCount * 4; BE_WORK work; @@ -2158,7 +2233,7 @@ void BinPoints( desc.pAttribs = pAttribs; desc.numAttribs = linkageCount; - ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, pAttribs); + pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs); // store raster tile aligned x, y, perspective correct z float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16); @@ -2265,11 +2340,11 @@ void BinPoints( _simd_store_ps((float*)aPrimVertsZ, primVerts.z); // scan remaining valid prims and bin each separately + const SWR_BACKEND_STATE& backendState = state.backendState; DWORD primIndex; while (_BitScanForward(&primIndex, primMask)) { - uint32_t linkageCount = state.linkageCount; - uint32_t linkageMask = state.linkageMask; + uint32_t linkageCount = backendState.numAttributes; uint32_t numScalarAttribs = linkageCount * 4; BE_WORK work; @@ -2290,7 +2365,7 @@ void BinPoints( // store active attribs desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16); desc.numAttribs = linkageCount; - ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs); + pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs); // store point vertex data float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16); @@ -2353,6 +2428,10 @@ void BinLines( const SWR_FRONTEND_STATE& feState = state.frontendState; const SWR_GS_STATE& gsState = state.gsState; + // Select attribute processor + PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(2, + state.backendState.swizzleEnable, state.backendState.constantInterpolationMask); + simdscalar vRecipW0 = _simd_set1_ps(1.0f); simdscalar vRecipW1 = _simd_set1_ps(1.0f); @@ -2485,8 +2564,7 @@ void BinLines( DWORD primIndex; while (_BitScanForward(&primIndex, primMask)) { - uint32_t linkageCount = state.linkageCount; - uint32_t linkageMask = state.linkageMask; + uint32_t linkageCount = state.backendState.numAttributes; uint32_t numScalarAttribs = linkageCount * 4; BE_WORK work; @@ -2507,7 +2585,7 @@ void BinLines( // store active attribs desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16); desc.numAttribs = linkageCount; - ProcessAttributes<2>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs); + pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs); // store line vertex data desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16); diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h index 6aa73c1ddf1..64932af6145 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa.h +++ b/src/gallium/drivers/swr/rasterizer/core/pa.h @@ -1169,15 +1169,8 @@ struct PA_FACTORY topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ))) { memset(&indexStore, 0, sizeof(indexStore)); - DWORD numAttribs; - if (_BitScanReverse(&numAttribs, state.feAttribMask)) - { - numAttribs++; - } - else - { - numAttribs = 0; - } + uint32_t numAttribs = state.feNumAttributes; + new (&this->paCut) PA_STATE_CUT(pDC, (uint8_t*)&this->vertexStore[0], MAX_NUM_VERTS_PER_PRIM * KNOB_SIMD_WIDTH, &this->indexStore[0], numVerts, numAttribs, state.topology, false); cutPA = true; diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 9fc304a8c3f..0931c82b5d2 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -938,13 +938,34 @@ struct SWR_RASTSTATE uint8_t clipDistanceMask; }; +enum SWR_CONSTANT_SOURCE +{ + SWR_CONSTANT_SOURCE_CONST_0000, + SWR_CONSTANT_SOURCE_CONST_0001_FLOAT, + SWR_CONSTANT_SOURCE_CONST_1111_FLOAT, + SWR_CONSTANT_SOURCE_PRIM_ID +}; + +struct SWR_ATTRIB_SWIZZLE +{ + uint16_t sourceAttrib : 5; // source attribute + uint16_t constantSource : 2; // constant source to apply + uint16_t componentOverrideMask : 4; // override component with constant source +}; + // backend state struct SWR_BACKEND_STATE { - uint32_t constantInterpolationMask; - uint32_t pointSpriteTexCoordMask; - uint8_t numAttributes; - uint8_t numComponents[KNOB_NUM_ATTRIBUTES]; + uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation + uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates + + uint8_t numAttributes; // total number of attributes to send to backend (up to 32) + uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components + + bool swizzleEnable; // when enabled, core will parse the swizzle map when + // setting up attributes for the backend, otherwise + // all attributes up to numAttributes will be sent + SWR_ATTRIB_SWIZZLE swizzleMap[32]; }; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h index 1d8e9a111ed..d3181cd29ec 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h @@ -80,12 +80,12 @@ enum ComponentEnable enum ComponentControl { - NoStore = 0, - StoreSrc = 1, - Store0 = 2, - Store1Fp = 3, - Store1Int = 4, - StoreVertexId = 5, + NoStore = 0, + StoreSrc = 1, + Store0 = 2, + Store1Fp = 3, + Store1Int = 4, + StoreVertexId = 5, StoreInstanceId = 6 }; diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index 4d1b604817b..ecb4545d13b 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -157,18 +157,6 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) { struct swr_vertex_shader *swr_vs = ctx->vs; - swr_vs->linkageMask = 0; - - for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) { - switch (swr_vs->info.base.output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - break; - default: - swr_vs->linkageMask |= (1 << i); - break; - } - } - LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 5caaa5c7139..dac95ce42e7 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -1373,16 +1373,13 @@ swr_update_derived(struct pipe_context *pipe, } } - uint32_t linkage = ctx->vs->linkageMask; - if (ctx->rasterizer->sprite_coord_enable) - linkage |= (1 << ctx->vs->info.base.num_outputs); - - SwrSetLinkage(ctx->swrContext, linkage, NULL); - // set up backend state SWR_BACKEND_STATE backendState = {0}; - backendState.numAttributes = 1; - backendState.numComponents[0] = 4; + backendState.numAttributes = + ctx->vs->info.base.num_outputs - 1 + + (ctx->rasterizer->sprite_coord_enable ? 1 : 0); + for (unsigned i = 0; i < backendState.numAttributes; i++) + backendState.numComponents[i] = 4; backendState.constantInterpolationMask = ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h index cb699641274..dcb1145a362 100644 --- a/src/gallium/drivers/swr/swr_state.h +++ b/src/gallium/drivers/swr/swr_state.h @@ -53,7 +53,6 @@ typedef ShaderVariant<PFN_PIXEL_KERNEL> VariantFS; struct swr_vertex_shader { struct pipe_shader_state pipe; struct lp_tgsi_info info; - unsigned linkageMask; std::unordered_map<swr_jit_vs_key, std::unique_ptr<VariantVS>> map; SWR_STREAMOUT_STATE soState; PFN_SO_FUNC soFunc[PIPE_PRIM_MAX] {0}; |