diff options
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 3da0e4064d9..986eced15f7 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -78,7 +78,7 @@ struct FetchJit : public Builder bool IsOddFormat(SWR_FORMAT format); bool IsUniformFormat(SWR_FORMAT format); void UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4]); - void CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* offsets, Value* result[4]); + void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]); void ConvertFormat(SWR_FORMAT format, Value *texels[4]); Value* mpFetchInfo; @@ -562,7 +562,7 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[ // gather for odd component size formats // gather SIMD full pixels per lane then shift/mask to move each component to their // own vector -void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* offsets, Value* result[4]) +void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]) { const SWR_FORMAT_INFO &info = GetFormatInfo(format); @@ -577,24 +577,34 @@ void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* of result[comp] = VIMMED1((int)info.defaults[comp]); } + // load the proper amount of data based on component size + PointerType* pLoadTy = nullptr; + switch (info.bpp) + { + case 8: pLoadTy = Type::getInt8PtrTy(JM()->mContext); break; + case 16: pLoadTy = Type::getInt16PtrTy(JM()->mContext); break; + case 24: + case 32: pLoadTy = Type::getInt32PtrTy(JM()->mContext); break; + default: SWR_ASSERT(0); + } + + // allocate temporary memory for masked off lanes + Value* pTmp = ALLOCA(pLoadTy->getElementType()); + // gather SIMD pixels for (uint32_t e = 0; e < JM()->mVWidth; ++e) { - Value* elemOffset = VEXTRACT(offsets, C(e)); - Value* load = GEP(pBase, elemOffset); + Value* pElemOffset = VEXTRACT(offsets, C(e)); + Value* pLoad = GEP(pBase, pElemOffset); + Value* pLaneMask = VEXTRACT(pMask, C(e)); - // load the proper amount of data based on component size - switch (info.bpp) - { - case 8: load = POINTER_CAST(load, Type::getInt8PtrTy(JM()->mContext)); break; - case 16: load = POINTER_CAST(load, Type::getInt16PtrTy(JM()->mContext)); break; - case 24: - case 32: load = POINTER_CAST(load, Type::getInt32PtrTy(JM()->mContext)); break; - default: SWR_ASSERT(0); - } + pLoad = POINTER_CAST(pLoad, pLoadTy); + + // mask in tmp pointer for disabled lanes + pLoad = SELECT(pLaneMask, pLoad, pTmp); // load pixel - Value *val = LOAD(load); + Value *val = LOAD(pLoad); // zero extend to 32bit integer val = INT_CAST(val, mInt32Ty, false); @@ -780,6 +790,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // blend in any partially OOB indices that have valid elements vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask); + Value* pMask = vGatherMask; vGatherMask = VMASK(vGatherMask); // calculate the actual offsets into the VB @@ -795,7 +806,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, if (IsOddFormat((SWR_FORMAT)ied.Format)) { Value* pResults[4]; - CreateGatherOddFormats((SWR_FORMAT)ied.Format, pStreamBase, vOffsets, pResults); + CreateGatherOddFormats((SWR_FORMAT)ied.Format, pMask, pStreamBase, vOffsets, pResults); ConvertFormat((SWR_FORMAT)ied.Format, pResults); for (uint32_t c = 0; c < 4; ++c) |