summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-04-21 13:35:55 -0500
committerTim Rowley <[email protected]>2017-04-28 19:57:02 -0500
commita46539af1102c087e6024c3c276ea5e7575d1108 (patch)
treeb24edf8c848147a5362ff0a06d8c8eaeba979895 /src/gallium/drivers/swr
parenteff909de7d25155dbb37118d2d4c9b129f131f29 (diff)
swr/rast: use gather instruction for odd format fetch
Small fetch performance optimization - use gather instruction for odd format fetch instead of slow emulated code. Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr')
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp55
1 files changed, 9 insertions, 46 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 8fc31ae9c6c..50c2e9b6bf0 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -590,64 +590,27 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[
// gather for odd component size formats
// gather SIMD full pixels per lane then shift/mask to move each component to their
// own vector
-void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4])
+void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4])
{
const SWR_FORMAT_INFO &info = GetFormatInfo(format);
// only works if pixel size is <= 32bits
SWR_ASSERT(info.bpp <= 32);
- Value* gather = VUNDEF_I();
+ Value* pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask, C((char)1));
- // assign defaults
for (uint32_t comp = 0; comp < 4; ++comp)
{
- result[comp] = VIMMED1((int)info.defaults[comp]);
+ pResult[comp] = VIMMED1((int)info.defaults[comp]);
}
- // load the proper amount of data based on component size
- PointerType* pLoadTy = nullptr;
- switch (info.bpp)
- {
- case 8: pLoadTy = Type::getInt8PtrTy(JM()->mContext); break;
- case 16: pLoadTy = Type::getInt16PtrTy(JM()->mContext); break;
- case 24:
- case 32: pLoadTy = Type::getInt32PtrTy(JM()->mContext); break;
- default: SWR_INVALID("Invalid bpp: %d", info.bpp);
- }
-
- // allocate temporary memory for masked off lanes
- Value* pTmp = ALLOCA(pLoadTy->getElementType());
-
- // gather SIMD pixels
- for (uint32_t e = 0; e < JM()->mVWidth; ++e)
- {
- Value* pElemOffset = VEXTRACT(offsets, C(e));
- Value* pLoad = GEP(pBase, pElemOffset);
- Value* pLaneMask = VEXTRACT(pMask, C(e));
-
- pLoad = POINTER_CAST(pLoad, pLoadTy);
-
- // mask in tmp pointer for disabled lanes
- pLoad = SELECT(pLaneMask, pLoad, pTmp);
-
- // load pixel
- Value *val = LOAD(pLoad);
-
- // zero extend to 32bit integer
- val = INT_CAST(val, mInt32Ty, false);
-
- // store in simd lane
- gather = VINSERT(gather, val, C(e));
- }
-
- UnpackComponents(format, gather, result);
+ UnpackComponents(format, pGather, pResult);
// cast to fp32
- result[0] = BITCAST(result[0], mSimdFP32Ty);
- result[1] = BITCAST(result[1], mSimdFP32Ty);
- result[2] = BITCAST(result[2], mSimdFP32Ty);
- result[3] = BITCAST(result[3], mSimdFP32Ty);
+ pResult[0] = BITCAST(pResult[0], mSimdFP32Ty);
+ pResult[1] = BITCAST(pResult[1], mSimdFP32Ty);
+ pResult[2] = BITCAST(pResult[2], mSimdFP32Ty);
+ pResult[3] = BITCAST(pResult[3], mSimdFP32Ty);
}
void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
@@ -860,7 +823,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
if (IsOddFormat((SWR_FORMAT)ied.Format))
{
Value* pResults[4];
- CreateGatherOddFormats((SWR_FORMAT)ied.Format, pMask, pStreamBase, vOffsets, pResults);
+ CreateGatherOddFormats((SWR_FORMAT)ied.Format, vGatherMask, pStreamBase, vOffsets, pResults);
ConvertFormat((SWR_FORMAT)ied.Format, pResults);
for (uint32_t c = 0; c < 4; ++c)