diff options
author | Alok Hota <[email protected]> | 2018-08-23 18:42:25 -0500 |
---|---|---|
committer | Alok Hota <[email protected]> | 2019-01-16 13:53:30 -0600 |
commit | 9459863dfa16071ff5088a15d853028f2865c4a7 (patch) | |
tree | c7473f02d044096d811a8e26b8fc1a888210cb45 | |
parent | 9cacf9d8772fe8efd8bdded902b1b2f9b2c6e1cd (diff) |
swr/rast: partial support for Tiled Resources
- updated sample from TRTT surfaces correctly
- implemented mapped status return for TRTT surfaces
- implemented per-sample instruction minLod clamp
- updated bilinear filter weight calculation to be closer to D3D specs
- implemented "ReducedTexcoordRange" operation from D3D specs to avoid
loss of precision on high-value normalized coordinates
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp | 142 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h | 22 |
2 files changed, 164 insertions, 0 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 26d8688f5e9..65eec4e4c68 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -765,6 +765,148 @@ namespace SwrJit Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); } ////////////////////////////////////////////////////////////////////////// + /// @brief Float / Fixed-point conversions + ////////////////////////////////////////////////////////////////////////// + Value* Builder::VCVT_F32_FIXED_SI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + Value* fixed = nullptr; + if constexpr (false) // This doesn't work for negative numbers!! + { + fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)), + mSimdInt32Ty); + } + else + { + // Do round to nearest int on fractional bits first + // Not entirely perfect for negative numbers, but close enough + vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)); + vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); + + // TODO: Handle INF, NAN, overflow / underflow, etc. + + Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); + Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); + Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); + vFixed = OR(vFixed, VIMMED1(1 << 23)); + vFixed = SELECT(vSgn, NEG(vFixed), vFixed); + + Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); + vExp = SUB(vExp, VIMMED1(127)); + + Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); + + fixed = ASHR(vFixed, vExtraBits, name); + } + + return fixed; + } + + Value* Builder::VCVT_FIXED_SI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + uint32_t extraBits = 32 - numIntBits - numFracBits; + if (numIntBits && extraBits) + { + // Sign extend + Value* shftAmt = VIMMED1(extraBits); + vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); + } + + Value* fVal = VIMMED1(0.0f); + Value* fFrac = VIMMED1(0.0f); + if (numIntBits) + { + fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); + } + + if (numFracBits) + { + fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); + fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); + } + + return FADD(fVal, fFrac, name); + } + + Value* Builder::VCVT_F32_FIXED_UI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + Value* fixed = nullptr; + if constexpr (true) // KNOB_SIM_FAST_MATH? Below works correctly from a precision + // standpoint... + { + fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)), + mSimdInt32Ty); + } + else + { + // Do round to nearest int on fractional bits first + vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)); + vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); + + // TODO: Handle INF, NAN, overflow / underflow, etc. + + Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); + Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); + Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); + vFixed = OR(vFixed, VIMMED1(1 << 23)); + + Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); + vExp = SUB(vExp, VIMMED1(127)); + + Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); + + fixed = LSHR(vFixed, vExtraBits, name); + } + + return fixed; + } + + Value* Builder::VCVT_FIXED_UI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + uint32_t extraBits = 32 - numIntBits - numFracBits; + if (numIntBits && extraBits) + { + // Sign extend + Value* shftAmt = VIMMED1(extraBits); + vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); + } + + Value* fVal = VIMMED1(0.0f); + Value* fFrac = VIMMED1(0.0f); + if (numIntBits) + { + fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); + } + + if (numFracBits) + { + fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); + fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); + } + + return FADD(fVal, fFrac, name); + } + + ////////////////////////////////////////////////////////////////////////// /// @brief C functions called by LLVM IR ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index f8701f9ba84..91e2a32f1a1 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -123,6 +123,28 @@ Value* VMASK_16(Value* mask); Value* VMOVMSK(Value* mask); ////////////////////////////////////////////////////////////////////////// +/// @brief Float / Fixed-point conversions +////////////////////////////////////////////////////////////////////////// +// Signed +Value* VCVT_F32_FIXED_SI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +Value* VCVT_FIXED_SI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +// Unsigned +Value* VCVT_F32_FIXED_UI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +Value* VCVT_FIXED_UI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); + +////////////////////////////////////////////////////////////////////////// /// @brief functions that build IR to call x86 intrinsics directly, or /// emulate them with other instructions if not available on the host ////////////////////////////////////////////////////////////////////////// |