diff options
Diffstat (limited to 'src/gallium/drivers/swr')
5 files changed, 26 insertions, 5 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 9c1e9e0ac8f..bced6576443 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -58,7 +58,6 @@ intrinsics = [ ['VPTESTC', ['a', 'b'], 'mInt32Ty'], ['VPTESTZ', ['a', 'b'], 'mInt32Ty'], ['VFMADDPS', ['a', 'b', 'c'], 'a'], - ['VMOVMSKPS', ['a'], 'mInt32Ty'], ['VPHADDD', ['a', 'b'], 'a'], ['PDEP32', ['a', 'b'], 'a'], ['RDTSC', [], 'mInt64Ty'], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index f0cd4413d3e..5b70b29afba 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -608,7 +608,7 @@ namespace SwrJit pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0)); pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, PointerType::get(mInt32Ty, 0)); - Value* pMask = VMOVMSKPS(BITCAST(vMask, mSimdFP32Ty)); + Value* pMask = VMOVMSK(vMask); // Setup loop basic block BasicBlock* pLoop = BasicBlock::Create(mpJitMgr->mContext, "Scatter_Loop", pFunc); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index aa9e2dddee8..f8936930b7e 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -525,6 +525,28 @@ namespace SwrJit return S_EXT(mask, mSimd16Int32Ty); } + /// @brief Convert <Nxi1> llvm mask to integer + Value *Builder::VMOVMSK(Value* mask) + { + SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty); + uint32_t numLanes = mask->getType()->getVectorNumElements(); + Value* i32Result; + if (numLanes == 8) + { + i32Result = BITCAST(mask, mInt8Ty); + } + else if (numLanes == 16) + { + i32Result = BITCAST(mask, mInt16Ty); + } + else + { + SWR_ASSERT("Unsupported vector width"); + i32Result = BITCAST(mask, mInt8Ty); + } + return Z_EXT(i32Result, mInt32Ty); + } + ////////////////////////////////////////////////////////////////////////// /// @brief Generate a VPSHUFB operation in LLVM IR. If not /// supported on the underlying platform, emulate it @@ -768,8 +790,7 @@ namespace SwrJit /// @brief pop count on vector mask (e.g. <8 x i1>) Value* Builder::VPOPCNT(Value* a) { - Value* b = BITCAST(VMASK(a), mSimdFP32Ty); - return POPCNT(VMOVMSKPS(b)); + return POPCNT(VMOVMSK(a)); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 7308821c89e..bd4be9ffe2a 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -102,6 +102,8 @@ Value *MASK_16(Value *vmask); Value *VMASK(Value *mask); Value *VMASK_16(Value *mask); +Value *VMOVMSK(Value *mask); + ////////////////////////////////////////////////////////////////////////// /// @brief functions that build IR to call x86 intrinsics directly, or /// emulate them with other instructions if not available on the host diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp index 7cfa7724980..856d67d2bc9 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp @@ -79,7 +79,6 @@ namespace SwrJit {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256}, {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256}, {"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256}, - {"meta.intrinsic.VMOVMSKPS", Intrinsic::x86_avx_movmsk_ps_256}, {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d}, {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32}, {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc}, |