diff options
Diffstat (limited to 'src/gallium/drivers/swr')
4 files changed, 25 insertions, 12 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h index 29151682e07..a160ca2c5ed 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h @@ -126,7 +126,7 @@ typedef SIMD512 SIMD16; #define _simd16_add_epi8 SIMD16::add_epi8 #define _simd16_shuffle_epi8 SIMD16::shuffle_epi8 -#define _simd16_i32gather_ps(m, index, scale) SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(index, m) +#define _simd16_i32gather_ps(m, index, scale) SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index) #define _simd16_mask_i32gather_ps(a, m, index, mask, scale) SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask) #define _simd16_abs_epi32 SIMD16::abs_epi32 @@ -162,6 +162,18 @@ typedef SIMD512 SIMD16; #define _simd16_int2mask(mask) simd16mask(mask) #define _simd16_mask2int(mask) int(mask) +// convert bitmask to vector mask +SIMDINLINE simd16scalar vMask16(int32_t mask) +{ + simd16scalari temp = _simd16_set1_epi32(mask); + + simd16scalari bits = _simd16_set_epi32(0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001); + + simd16scalari result = _simd16_cmplt_epi32(_simd16_setzero_si(), _simd16_and_si(temp, bits)); + + return _simd16_castsi_ps(result); +} + #endif//ENABLE_AVX512_SIMD16 #endif//__SWR_SIMD16INTRIN_H_ diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index 226952e282e..535e4ed8ca0 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -543,7 +543,7 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In { __mmask16 k = _mm512_cmpneq_ps_mask(mask, setzero_ps()); - return _mm512_mask_i32gather_ps(old, k, idx, p, ScaleT); + return _mm512_mask_i32gather_ps(old, k, idx, p, static_cast<int>(ScaleT)); } static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 34789cf0356..de6691b4cf8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -1076,13 +1076,14 @@ void SIMDCALL BinTriangles_simd16( (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, EdgeValToEdgeState(ALL_EDGES_VALID), (state.scissorsTileAligned == false)); } + simd16BBox bbox; + if (!triMask) { goto endBinTriangles; } // Calc bounding box of triangles - simd16BBox bbox; calcBoundingBoxIntVertical<CT>(tri, vXi, vYi, bbox); // determine if triangle falls between pixel centers and discard @@ -2102,7 +2103,7 @@ void SIMDCALL BinPoints_simd16( // OOB indices => forced to zero. simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); - vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai) + vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai); simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports); viewportIdx = _simd16_and_si(vClearMask, vpai); @@ -2461,6 +2462,13 @@ void BinPostSetupLines_simd16( const simdscalar unused = _simd_setzero_ps(); + // transpose verts needed for backend + /// @todo modify BE to take non-transformed verts + simd4scalar vHorizX[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH + simd4scalar vHorizY[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH + simd4scalar vHorizZ[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH + simd4scalar vHorizW[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH + if (!primMask) { goto endBinLines; @@ -2479,13 +2487,6 @@ void BinPostSetupLines_simd16( _simd16_store_si(reinterpret_cast<simd16scalari *>(aMTTop), bbox.ymin); _simd16_store_si(reinterpret_cast<simd16scalari *>(aMTBottom), bbox.ymax); - // transpose verts needed for backend - /// @todo modify BE to take non-transformed verts - simd4scalar vHorizX[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH - simd4scalar vHorizY[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH - simd4scalar vHorizZ[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH - simd4scalar vHorizW[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH - vTranspose3x8(vHorizX[0], _simd16_extract_ps(prim[0].x, 0), _simd16_extract_ps(prim[1].x, 0), unused); vTranspose3x8(vHorizY[0], _simd16_extract_ps(prim[0].y, 0), _simd16_extract_ps(prim[1].y, 0), unused); vTranspose3x8(vHorizZ[0], _simd16_extract_ps(prim[0].z, 0), _simd16_extract_ps(prim[1].z, 0), unused); diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 8a4fe6dcfa0..36c84025957 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -1095,7 +1095,7 @@ public: AR_BEGIN(FEGuardbandClip, pa.pDC->drawId); // we have to clip tris, execute the clipper, which will also // call the binner - ClipSimd(_simd16_vmask_ps(primMask), _simd16_vmask_ps(clipMask), pa, primId); + ClipSimd(vMask(primMask), vMask(clipMask), pa, primId); AR_END(FEGuardbandClip, 1); } else if (validMask) |