summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp41
1 files changed, 26 insertions, 15 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 3da0e4064d9..986eced15f7 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -78,7 +78,7 @@ struct FetchJit : public Builder
bool IsOddFormat(SWR_FORMAT format);
bool IsUniformFormat(SWR_FORMAT format);
void UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4]);
- void CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* offsets, Value* result[4]);
+ void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
void ConvertFormat(SWR_FORMAT format, Value *texels[4]);
Value* mpFetchInfo;
@@ -562,7 +562,7 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[
// gather for odd component size formats
// gather SIMD full pixels per lane then shift/mask to move each component to their
// own vector
-void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* offsets, Value* result[4])
+void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4])
{
const SWR_FORMAT_INFO &info = GetFormatInfo(format);
@@ -577,24 +577,34 @@ void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pBase, Value* of
result[comp] = VIMMED1((int)info.defaults[comp]);
}
+ // load the proper amount of data based on component size
+ PointerType* pLoadTy = nullptr;
+ switch (info.bpp)
+ {
+ case 8: pLoadTy = Type::getInt8PtrTy(JM()->mContext); break;
+ case 16: pLoadTy = Type::getInt16PtrTy(JM()->mContext); break;
+ case 24:
+ case 32: pLoadTy = Type::getInt32PtrTy(JM()->mContext); break;
+ default: SWR_ASSERT(0);
+ }
+
+ // allocate temporary memory for masked off lanes
+ Value* pTmp = ALLOCA(pLoadTy->getElementType());
+
// gather SIMD pixels
for (uint32_t e = 0; e < JM()->mVWidth; ++e)
{
- Value* elemOffset = VEXTRACT(offsets, C(e));
- Value* load = GEP(pBase, elemOffset);
+ Value* pElemOffset = VEXTRACT(offsets, C(e));
+ Value* pLoad = GEP(pBase, pElemOffset);
+ Value* pLaneMask = VEXTRACT(pMask, C(e));
- // load the proper amount of data based on component size
- switch (info.bpp)
- {
- case 8: load = POINTER_CAST(load, Type::getInt8PtrTy(JM()->mContext)); break;
- case 16: load = POINTER_CAST(load, Type::getInt16PtrTy(JM()->mContext)); break;
- case 24:
- case 32: load = POINTER_CAST(load, Type::getInt32PtrTy(JM()->mContext)); break;
- default: SWR_ASSERT(0);
- }
+ pLoad = POINTER_CAST(pLoad, pLoadTy);
+
+ // mask in tmp pointer for disabled lanes
+ pLoad = SELECT(pLaneMask, pLoad, pTmp);
// load pixel
- Value *val = LOAD(load);
+ Value *val = LOAD(pLoad);
// zero extend to 32bit integer
val = INT_CAST(val, mInt32Ty, false);
@@ -780,6 +790,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
// blend in any partially OOB indices that have valid elements
vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask);
+ Value* pMask = vGatherMask;
vGatherMask = VMASK(vGatherMask);
// calculate the actual offsets into the VB
@@ -795,7 +806,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
if (IsOddFormat((SWR_FORMAT)ied.Format))
{
Value* pResults[4];
- CreateGatherOddFormats((SWR_FORMAT)ied.Format, pStreamBase, vOffsets, pResults);
+ CreateGatherOddFormats((SWR_FORMAT)ied.Format, pMask, pStreamBase, vOffsets, pResults);
ConvertFormat((SWR_FORMAT)ied.Format, pResults);
for (uint32_t c = 0; c < 4; ++c)