diff options
author | Tim Rowley <[email protected]> | 2017-01-05 07:29:22 -0600 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2017-01-05 14:10:36 -0600 |
commit | 33fa4c99f7fa68fd8c33c75c4fe66c4cca76779f (patch) | |
tree | 5662d35a22a18cd4c068c9b42d478c69d0e219bd /src | |
parent | b6670157d742548e7f2430614786c733eb4c20e9 (diff) |
swr: [rasterizer core/common/jitter] gl_double support
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99214
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src')
9 files changed, 341 insertions, 33 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.cpp b/src/gallium/drivers/swr/rasterizer/common/formats.cpp index aba4c3f3380..72020ee1936 100644 --- a/src/gallium/drivers/swr/rasterizer/common/formats.cpp +++ b/src/gallium/drivers/swr/rasterizer/common/formats.cpp @@ -149,16 +149,26 @@ const SWR_FORMAT_INFO gFormatInfo[] = { { 0.0f, 0.0f, 0.0f, 0.0f }, 1, 1 }, - // padding (0x5) + // R64G64_FLOAT (0x5) { - nullptr, - { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN }, - { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, - 0, 0, 0, false, false, false, false, - { false, false, false, false }, - { 0.0f, 0.0f, 0.0f, 0.0f }, - 1, 1 + "R64G64_FLOAT", + { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN }, + { 0, 0, 0, 0x3f800000 }, // Defaults for missing components + { 0, 1, 0, 0 }, // Swizzle + { 64, 64, 0, 0 }, // Bits per component + 128, // Bits per element + 16, // Bytes per element + 2, // Num components + false, // isSRGB + false, // isBC + false, // isSubsampled + false, // isLuminance + { false, false, false, false }, // Is normalized? + { 1.0f, 1.0f, 0, 0 }, // To float scale factor + 1, // bcWidth + 1, // bcHeight }, + // R32G32B32X32_FLOAT (0x6) { "R32G32B32X32_FLOAT", @@ -1719,16 +1729,26 @@ const SWR_FORMAT_INFO gFormatInfo[] = { { 0.0f, 0.0f, 0.0f, 0.0f }, 1, 1 }, - // padding (0x8D) + // R64_FLOAT (0x8D) { - nullptr, - { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN }, - { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, - 0, 0, 0, false, false, false, false, - { false, false, false, false }, - { 0.0f, 0.0f, 0.0f, 0.0f }, - 1, 1 + "R64_FLOAT", + { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN }, + { 0, 0, 0, 0x3f800000 }, // Defaults for missing components + { 0, 0, 0, 0 }, // Swizzle + { 64, 0, 0, 0 }, // Bits per component + 64, // Bits per element + 8, // Bytes per element + 1, // Num components + false, // isSRGB + false, // isBC + false, // isSubsampled + false, // isLuminance + { false, false, false, false }, // Is normalized? + { 1.0f, 0, 0, 0 }, // To float scale factor + 1, // bcWidth + 1, // bcHeight }, + // R16G16B16X16_UNORM (0x8E) { "R16G16B16X16_UNORM", @@ -5529,26 +5549,46 @@ const SWR_FORMAT_INFO gFormatInfo[] = { 1, // bcHeight }, - // padding (0x197) + // R64G64B64A64_FLOAT (0x197) { - nullptr, - { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN }, - { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, - 0, 0, 0, false, false, false, false, - { false, false, false, false }, - { 0.0f, 0.0f, 0.0f, 0.0f }, - 1, 1 + "R64G64B64A64_FLOAT", + { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT }, + { 0, 0, 0, 0x3f800000 }, // Defaults for missing components + { 0, 1, 2, 3 }, // Swizzle + { 64, 64, 64, 64 }, // Bits per component + 256, // Bits per element + 32, // Bytes per element + 4, // Num components + false, // isSRGB + false, // isBC + false, // isSubsampled + false, // isLuminance + { false, false, false, false }, // Is normalized? + { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor + 1, // bcWidth + 1, // bcHeight }, - // padding (0x198) + + // R64G64B64_FLOAT (0x198) { - nullptr, - { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN }, - { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 }, - 0, 0, 0, false, false, false, false, - { false, false, false, false }, - { 0.0f, 0.0f, 0.0f, 0.0f }, - 1, 1 + "R64G64B64_FLOAT", + { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN }, + { 0, 0, 0, 0x3f800000 }, // Defaults for missing components + { 0, 1, 2, 0 }, // Swizzle + { 64, 64, 64, 0 }, // Bits per component + 192, // Bits per element + 24, // Bytes per element + 3, // Num components + false, // isSRGB + false, // isBC + false, // isSubsampled + false, // isLuminance + { false, false, false, false }, // Is normalized? + { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor + 1, // bcWidth + 1, // bcHeight }, + // BC4_SNORM (0x199) { "BC4_SNORM", diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.h b/src/gallium/drivers/swr/rasterizer/common/formats.h index 3d686d3e0b0..0056a56f829 100644 --- a/src/gallium/drivers/swr/rasterizer/common/formats.h +++ b/src/gallium/drivers/swr/rasterizer/common/formats.h @@ -57,6 +57,7 @@ enum SWR_FORMAT R32G32B32A32_FLOAT = 0x0, R32G32B32A32_SINT = 0x1, R32G32B32A32_UINT = 0x2, + R64G64_FLOAT = 0x5, R32G32B32X32_FLOAT = 0x6, R32G32B32A32_SSCALED = 0x7, R32G32B32A32_USCALED = 0x8, @@ -78,6 +79,7 @@ enum SWR_FORMAT R32_FLOAT_X8X24_TYPELESS = 0x88, X32_TYPELESS_G8X24_UINT = 0x89, L32A32_FLOAT = 0x8A, + R64_FLOAT = 0x8D, R16G16B16X16_UNORM = 0x8E, R16G16B16X16_FLOAT = 0x8F, L32X32_FLOAT = 0x91, @@ -193,6 +195,8 @@ enum SWR_FORMAT R8G8B8_SNORM = 0x194, R8G8B8_SSCALED = 0x195, R8G8B8_USCALED = 0x196, + R64G64B64A64_FLOAT = 0x197, + R64G64B64_FLOAT = 0x198, BC4_SNORM = 0x199, BC5_SNORM = 0x19A, R16G16B16_FLOAT = 0x19B, diff --git a/src/gallium/drivers/swr/rasterizer/core/format_traits.h b/src/gallium/drivers/swr/rasterizer/core/format_traits.h index 59d4e7d7e9f..6c428043878 100644 --- a/src/gallium/drivers/swr/rasterizer/core/format_traits.h +++ b/src/gallium/drivers/swr/rasterizer/core/format_traits.h @@ -134,6 +134,28 @@ template<> struct FormatTraits<R32G32B32A32_UINT> : }; ////////////////////////////////////////////////////////////////////////// +/// FormatTraits<R64G64_FLOAT> - Format traits specialization for R64G64_FLOAT +////////////////////////////////////////////////////////////////////////// +template<> struct FormatTraits<R64G64_FLOAT> : + ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>, + FormatSwizzle<0, 1>, + Defaults<0, 0, 0, 0x3f800000> +{ + static const uint32_t bpp{ 128 }; + static const uint32_t numComps{ 2 }; + static const bool hasAlpha{ false }; + static const uint32_t alphaComp{ 0 }; + static const bool isSRGB{ false }; + static const bool isBC{ false }; + static const bool isSubsampled{ false }; + static const uint32_t bcWidth{ 1 }; + static const uint32_t bcHeight{ 1 }; + + typedef Transpose64_64 TransposeT; + typedef Format2<64, 64> FormatT; +}; + +////////////////////////////////////////////////////////////////////////// /// FormatTraits<R32G32B32X32_FLOAT> - Format traits specialization for R32G32B32X32_FLOAT ////////////////////////////////////////////////////////////////////////// template<> struct FormatTraits<R32G32B32X32_FLOAT> : @@ -596,6 +618,28 @@ template<> struct FormatTraits<L32A32_FLOAT> : }; ////////////////////////////////////////////////////////////////////////// +/// FormatTraits<R64_FLOAT> - Format traits specialization for R64_FLOAT +////////////////////////////////////////////////////////////////////////// +template<> struct FormatTraits<R64_FLOAT> : + ComponentTraits<SWR_TYPE_FLOAT, 64>, + FormatSwizzle<0>, + Defaults<0, 0, 0, 0x3f800000> +{ + static const uint32_t bpp{ 64 }; + static const uint32_t numComps{ 1 }; + static const bool hasAlpha{ false }; + static const uint32_t alphaComp{ 0 }; + static const bool isSRGB{ false }; + static const bool isBC{ false }; + static const bool isSubsampled{ false }; + static const uint32_t bcWidth{ 1 }; + static const uint32_t bcHeight{ 1 }; + + typedef TransposeSingleComponent<64> TransposeT; + typedef Format1<64> FormatT; +}; + +////////////////////////////////////////////////////////////////////////// /// FormatTraits<R16G16B16X16_UNORM> - Format traits specialization for R16G16B16X16_UNORM ////////////////////////////////////////////////////////////////////////// template<> struct FormatTraits<R16G16B16X16_UNORM> : @@ -3126,6 +3170,50 @@ template<> struct FormatTraits<R8G8B8_USCALED> : }; ////////////////////////////////////////////////////////////////////////// +/// FormatTraits<R64G64B64A64_FLOAT> - Format traits specialization for R64G64B64A64_FLOAT +////////////////////////////////////////////////////////////////////////// +template<> struct FormatTraits<R64G64B64A64_FLOAT> : + ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>, + FormatSwizzle<0, 1, 2, 3>, + Defaults<0, 0, 0, 0x3f800000> +{ + static const uint32_t bpp{ 256 }; + static const uint32_t numComps{ 4 }; + static const bool hasAlpha{ true }; + static const uint32_t alphaComp{ 3 }; + static const bool isSRGB{ false }; + static const bool isBC{ false }; + static const bool isSubsampled{ false }; + static const uint32_t bcWidth{ 1 }; + static const uint32_t bcHeight{ 1 }; + + typedef Transpose64_64_64_64 TransposeT; + typedef Format4<64, 64, 64, 64> FormatT; +}; + +////////////////////////////////////////////////////////////////////////// +/// FormatTraits<R64G64B64_FLOAT> - Format traits specialization for R64G64B64_FLOAT +////////////////////////////////////////////////////////////////////////// +template<> struct FormatTraits<R64G64B64_FLOAT> : + ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>, + FormatSwizzle<0, 1, 2>, + Defaults<0, 0, 0, 0x3f800000> +{ + static const uint32_t bpp{ 192 }; + static const uint32_t numComps{ 3 }; + static const bool hasAlpha{ false }; + static const uint32_t alphaComp{ 0 }; + static const bool isSRGB{ false }; + static const bool isBC{ false }; + static const bool isSubsampled{ false }; + static const uint32_t bcWidth{ 1 }; + static const uint32_t bcHeight{ 1 }; + + typedef Transpose64_64_64 TransposeT; + typedef Format3<64, 64, 64> FormatT; +}; + +////////////////////////////////////////////////////////////////////////// /// FormatTraits<BC4_SNORM> - Format traits specialization for BC4_SNORM ////////////////////////////////////////////////////////////////////////// template<> struct FormatTraits<BC4_SNORM> : diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h index a2365757a7f..0e2cb47f2bd 100644 --- a/src/gallium/drivers/swr/rasterizer/core/utils.h +++ b/src/gallium/drivers/swr/rasterizer/core/utils.h @@ -856,6 +856,70 @@ struct Transpose11_11_10 #endif }; +////////////////////////////////////////////////////////////////////////// +/// Transpose64 +////////////////////////////////////////////////////////////////////////// +struct Transpose64 +{ + ////////////////////////////////////////////////////////////////////////// + /// @brief Performs an SOA to AOS conversion + /// @param pSrc - source data in SOA form + /// @param pDst - output data in AOS form + static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete; +#if ENABLE_AVX512_SIMD16 + + static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete; +#endif +}; + +////////////////////////////////////////////////////////////////////////// +/// Transpose64_64 +////////////////////////////////////////////////////////////////////////// +struct Transpose64_64 +{ + ////////////////////////////////////////////////////////////////////////// + /// @brief Performs an SOA to AOS conversion + /// @param pSrc - source data in SOA form + /// @param pDst - output data in AOS form + static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete; +#if ENABLE_AVX512_SIMD16 + + static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete; +#endif +}; + +////////////////////////////////////////////////////////////////////////// +/// Transpose64_64_64 +////////////////////////////////////////////////////////////////////////// +struct Transpose64_64_64 +{ + ////////////////////////////////////////////////////////////////////////// + /// @brief Performs an SOA to AOS conversion + /// @param pSrc - source data in SOA form + /// @param pDst - output data in AOS form + static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete; +#if ENABLE_AVX512_SIMD16 + + static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete; +#endif +}; + +////////////////////////////////////////////////////////////////////////// +/// Transpose64_64_64_64 +////////////////////////////////////////////////////////////////////////// +struct Transpose64_64_64_64 +{ + ////////////////////////////////////////////////////////////////////////// + /// @brief Performs an SOA to AOS conversion + /// @param pSrc - source data in SOA form + /// @param pDst - output data in AOS form + static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete; +#if ENABLE_AVX512_SIMD16 + + static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete; +#endif +}; + // helper function to unroll loops template<int Begin, int End, int Step = 1> struct UnrollerL { diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 8120a2f21ed..8744eb6554d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -632,6 +632,55 @@ namespace SwrJit } ////////////////////////////////////////////////////////////////////////// + /// @brief Generate a masked gather operation in LLVM IR. If not + /// supported on the underlying platform, emulate it with loads + /// @param vSrc - SIMD wide value that will be loaded if mask is invalid + /// @param pBase - Int8* base VB address pointer value + /// @param vIndices - SIMD wide value of VB byte offsets + /// @param vMask - SIMD wide mask that controls whether to access memory or the src values + /// @param scale - value to scale indices by + Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, Value* scale) + { + Value* vGather; + + // use avx2 gather instruction if available + if(JM()->mArch.AVX2()) + { + vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, scale); + } + else + { + Value* pStack = STACKSAVE(); + + // store vSrc on the stack. this way we can select between a valid load address and the vSrc address + Value* vSrcPtr = ALLOCA(vSrc->getType()); + STORE(vSrc, vSrcPtr); + + vGather = UndefValue::get(VectorType::get(mDoubleTy, 4)); + Value *vScaleVec = VECTOR_SPLAT(4, Z_EXT(scale,mInt32Ty)); + Value *vOffsets = MUL(vIndices,vScaleVec); + Value *mask = MASK(vMask); + for(uint32_t i = 0; i < mVWidth/2; ++i) + { + // single component byte index + Value *offset = VEXTRACT(vOffsets,C(i)); + // byte pointer to component + Value *loadAddress = GEP(pBase,offset); + loadAddress = BITCAST(loadAddress,PointerType::get(mDoubleTy,0)); + // pointer to the value to load if we're masking off a component + Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)}); + Value *selMask = VEXTRACT(mask,C(i)); + // switch in a safe address to load if we're trying to access a vertex + Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress); + Value *val = LOAD(validAddress); + vGather = VINSERT(vGather,val,C(i)); + } + STACKRESTORE(pStack); + } + return vGather; + } + + ////////////////////////////////////////////////////////////////////////// /// @brief convert x86 <N x float> mask to llvm <N x i1> mask Value* Builder::MASK(Value* vmask) { diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 86f7bf21a6b..67f938e192e 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -113,6 +113,8 @@ Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, Value* sc void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets, Value* mask, Value* vGatherComponents[], bool bPackedOutput); +Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, Value* scale); + void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask); void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index c5936e59bfe..984aab67cd6 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -519,7 +519,7 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str bool FetchJit::IsOddFormat(SWR_FORMAT format) { const SWR_FORMAT_INFO& info = GetFormatInfo(format); - if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32) + if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32 && info.bpc[0] != 64) { return true; } @@ -914,6 +914,58 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, } } break; + case 64: + { + for (uint32_t i = 0; i < 4; i++) + { + if (isComponentEnabled(compMask, i)) + { + // if we need to gather the component + if (compCtrl[i] == StoreSrc) + { + Value *vMaskLo = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3})); + Value *vMaskHi = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7})); + vMaskLo = S_EXT(vMaskLo, VectorType::get(mInt64Ty, 4)); + vMaskHi = S_EXT(vMaskHi, VectorType::get(mInt64Ty, 4)); + vMaskLo = BITCAST(vMaskLo, VectorType::get(mDoubleTy, 4)); + vMaskHi = BITCAST(vMaskHi, VectorType::get(mDoubleTy, 4)); + + Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0)); + Value *vOffsetsHi = VEXTRACTI128(vOffsets, C(1)); + + Value *vZeroDouble = VECTOR_SPLAT(4, ConstantFP::get(IRB()->getDoubleTy(), 0.0f)); + + Value* pGatherLo = GATHERPD(vZeroDouble, + pStreamBase, vOffsetsLo, vMaskLo, C((char)1)); + Value* pGatherHi = GATHERPD(vZeroDouble, + pStreamBase, vOffsetsHi, vMaskHi, C((char)1)); + + pGatherLo = VCVTPD2PS(pGatherLo); + pGatherHi = VCVTPD2PS(pGatherHi); + + Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, C({0, 1, 2, 3, 4, 5, 6, 7})); + + vVertexElements[currentVertexElement++] = pGather; + } + else + { + vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]); + } + + if (currentVertexElement > 3) + { + StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements); + // reset to the next vVertexElement to output + currentVertexElement = 0; + } + + } + + // offset base to the next component in the vertex to gather + pStreamBase = GEP(pStreamBase, C((char)8)); + } + } + break; default: SWR_ASSERT(0, "Tried to fetch invalid FP format"); break; @@ -1730,6 +1782,8 @@ PFN_FETCH_FUNC JitFetchFunc(HANDLE hJitMgr, const HANDLE hFunc) fclose(fd); #endif + pJitMgr->DumpAsm(const_cast<llvm::Function*>(func), "final"); + return pfnFetch; } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py index c6d09413211..70d35762bc5 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py @@ -84,6 +84,7 @@ inst_aliases = { } intrinsics = [ + ["VGATHERPD", "x86_avx2_gather_d_pd_256", ["src", "pBase", "indices", "mask", "scale"]], ["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", "mask", "scale"]], ["VGATHERDD", "x86_avx2_gather_d_d_256", ["src", "pBase", "indices", "mask", "scale"]], ["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]], @@ -101,6 +102,7 @@ intrinsics = [ ["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]], ["VPERMD", "x86_avx2_permd", ["a", "idx"]], ["VPERMPS", "x86_avx2_permps", ["idx", "a"]], + ["VCVTPD2PS", "x86_avx_cvt_pd2_ps_256", ["a"]], ["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]], ["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]], ["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]], diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 6ff21cdf186..cc8030eaff6 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -537,6 +537,11 @@ mesa_to_swr_format(enum pipe_format format) {PIPE_FORMAT_R32G32B32_FIXED, R32G32B32_SFIXED}, {PIPE_FORMAT_R32G32B32A32_FIXED, R32G32B32A32_SFIXED}, + {PIPE_FORMAT_R64_FLOAT, R64_FLOAT}, + {PIPE_FORMAT_R64G64_FLOAT, R64G64_FLOAT}, + {PIPE_FORMAT_R64G64B64_FLOAT, R64G64B64_FLOAT}, + {PIPE_FORMAT_R64G64B64A64_FLOAT, R64G64B64A64_FLOAT}, + /* These formats have entries in SWR but don't have Load/StoreTile * implementations. That means these aren't renderable, and thus having * a mapping entry here is detrimental. |