aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr
diff options
context:
space:
mode:
authorGeorge Kyriazis <[email protected]>2018-02-02 17:03:01 -0600
committerGeorge Kyriazis <[email protected]>2018-02-16 10:54:01 -0600
commite12db47a7ddcee6f26409b5b1dd722747560597d (patch)
tree1a6f16136979ac695af1f08604b179caafe4e896 /src/gallium/drivers/swr
parent9cc9688e4972542fc8fcebbd6d29edb28b2fe472 (diff)
swr/rast: Use llvm intrinsic masked gather
Use llvm intrinsic masked.gather instead of manual unroll for the cases where we have vector of pointers. Improves llvm IR debug experience by reducing a ton of IR to a single intrinsic call. Also seems to reduce overall stack use considerably. Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr')
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp12
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h2
2 files changed, 14 insertions, 0 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index 6e462d522f7..86fdfca392f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -346,6 +346,18 @@ namespace SwrJit
return vGather;
}
+ //////////////////////////////////////////////////////////////////////////
+ /// @brief Alternative masked gather where source is a vector of pointers
+ /// @param pVecSrcPtr - SIMD wide vector of pointers
+ /// @param pVecMask - SIMD active lanes
+ /// @param pVecPassthru - SIMD wide vector of values to load when lane is inactive
+ Value* Builder::GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru)
+ {
+ Function* pMaskedGather = llvm::Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::masked_gather, { pVecPassthru->getType() });
+
+ return CALL(pMaskedGather, { pVecSrcPtr, C(0), pVecMask, pVecPassthru });
+ }
+
void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
Value* mask, Value* vGatherComponents[], bool bPackedOutput)
{
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index c2279a62d98..f31cb4abae0 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -58,6 +58,8 @@ virtual void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byte
Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
+Value *GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
+
void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);