summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlok Hota <[email protected]>2018-08-23 18:42:25 -0500
committerAlok Hota <[email protected]>2019-01-16 13:53:30 -0600
commit9459863dfa16071ff5088a15d853028f2865c4a7 (patch)
treec7473f02d044096d811a8e26b8fc1a888210cb45
parent9cacf9d8772fe8efd8bdded902b1b2f9b2c6e1cd (diff)
swr/rast: partial support for Tiled Resources
- updated sample from TRTT surfaces correctly - implemented mapped status return for TRTT surfaces - implemented per-sample instruction minLod clamp - updated bilinear filter weight calculation to be closer to D3D specs - implemented "ReducedTexcoordRange" operation from D3D specs to avoid loss of precision on high-value normalized coordinates
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp142
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h22
2 files changed, 164 insertions, 0 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index 26d8688f5e9..65eec4e4c68 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -765,6 +765,148 @@ namespace SwrJit
Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); }
//////////////////////////////////////////////////////////////////////////
+ /// @brief Float / Fixed-point conversions
+ //////////////////////////////////////////////////////////////////////////
+ Value* Builder::VCVT_F32_FIXED_SI(Value* vFloat,
+ uint32_t numIntBits,
+ uint32_t numFracBits,
+ const llvm::Twine& name)
+ {
+ SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
+ Value* fixed = nullptr;
+ if constexpr (false) // This doesn't work for negative numbers!!
+ {
+ fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
+ C(_MM_FROUND_TO_NEAREST_INT)),
+ mSimdInt32Ty);
+ }
+ else
+ {
+ // Do round to nearest int on fractional bits first
+ // Not entirely perfect for negative numbers, but close enough
+ vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
+ C(_MM_FROUND_TO_NEAREST_INT));
+ vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
+
+ // TODO: Handle INF, NAN, overflow / underflow, etc.
+
+ Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f));
+ Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
+ Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1));
+ vFixed = OR(vFixed, VIMMED1(1 << 23));
+ vFixed = SELECT(vSgn, NEG(vFixed), vFixed);
+
+ Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
+ vExp = SUB(vExp, VIMMED1(127));
+
+ Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
+
+ fixed = ASHR(vFixed, vExtraBits, name);
+ }
+
+ return fixed;
+ }
+
+ Value* Builder::VCVT_FIXED_SI_F32(Value* vFixed,
+ uint32_t numIntBits,
+ uint32_t numFracBits,
+ const llvm::Twine& name)
+ {
+ SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
+ uint32_t extraBits = 32 - numIntBits - numFracBits;
+ if (numIntBits && extraBits)
+ {
+ // Sign extend
+ Value* shftAmt = VIMMED1(extraBits);
+ vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt);
+ }
+
+ Value* fVal = VIMMED1(0.0f);
+ Value* fFrac = VIMMED1(0.0f);
+ if (numIntBits)
+ {
+ fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
+ }
+
+ if (numFracBits)
+ {
+ fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
+ fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
+ }
+
+ return FADD(fVal, fFrac, name);
+ }
+
+ Value* Builder::VCVT_F32_FIXED_UI(Value* vFloat,
+ uint32_t numIntBits,
+ uint32_t numFracBits,
+ const llvm::Twine& name)
+ {
+ SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
+ Value* fixed = nullptr;
+ if constexpr (true) // KNOB_SIM_FAST_MATH? Below works correctly from a precision
+ // standpoint...
+ {
+ fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
+ C(_MM_FROUND_TO_NEAREST_INT)),
+ mSimdInt32Ty);
+ }
+ else
+ {
+ // Do round to nearest int on fractional bits first
+ vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
+ C(_MM_FROUND_TO_NEAREST_INT));
+ vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
+
+ // TODO: Handle INF, NAN, overflow / underflow, etc.
+
+ Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f));
+ Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
+ Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1));
+ vFixed = OR(vFixed, VIMMED1(1 << 23));
+
+ Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
+ vExp = SUB(vExp, VIMMED1(127));
+
+ Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
+
+ fixed = LSHR(vFixed, vExtraBits, name);
+ }
+
+ return fixed;
+ }
+
+ Value* Builder::VCVT_FIXED_UI_F32(Value* vFixed,
+ uint32_t numIntBits,
+ uint32_t numFracBits,
+ const llvm::Twine& name)
+ {
+ SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
+ uint32_t extraBits = 32 - numIntBits - numFracBits;
+ if (numIntBits && extraBits)
+ {
+ // Sign extend
+ Value* shftAmt = VIMMED1(extraBits);
+ vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt);
+ }
+
+ Value* fVal = VIMMED1(0.0f);
+ Value* fFrac = VIMMED1(0.0f);
+ if (numIntBits)
+ {
+ fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
+ }
+
+ if (numFracBits)
+ {
+ fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
+ fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
+ }
+
+ return FADD(fVal, fFrac, name);
+ }
+
+ //////////////////////////////////////////////////////////////////////////
/// @brief C functions called by LLVM IR
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index f8701f9ba84..91e2a32f1a1 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -123,6 +123,28 @@ Value* VMASK_16(Value* mask);
Value* VMOVMSK(Value* mask);
//////////////////////////////////////////////////////////////////////////
+/// @brief Float / Fixed-point conversions
+//////////////////////////////////////////////////////////////////////////
+// Signed
+Value* VCVT_F32_FIXED_SI(Value* vFloat,
+ uint32_t numIntBits,
+ uint32_t numFracBits,
+ const llvm::Twine& name = "");
+Value* VCVT_FIXED_SI_F32(Value* vFixed,
+ uint32_t numIntBits,
+ uint32_t numFracBits,
+ const llvm::Twine& name = "");
+// Unsigned
+Value* VCVT_F32_FIXED_UI(Value* vFloat,
+ uint32_t numIntBits,
+ uint32_t numFracBits,
+ const llvm::Twine& name = "");
+Value* VCVT_FIXED_UI_F32(Value* vFixed,
+ uint32_t numIntBits,
+ uint32_t numFracBits,
+ const llvm::Twine& name = "");
+
+//////////////////////////////////////////////////////////////////////////
/// @brief functions that build IR to call x86 intrinsics directly, or
/// emulate them with other instructions if not available on the host
//////////////////////////////////////////////////////////////////////////