aboutsummaryrefslogtreecommitdiffstats
path: root/core/fpu_ctrl.cpp
diff options
context:
space:
mode:
authorChris Robinson <[email protected]>2023-10-05 16:20:44 -0700
committerChris Robinson <[email protected]>2023-10-05 16:20:44 -0700
commit28ebc90521ca36fce35ea3e298938f2b35a8d79d (patch)
treec2f0129ea62e273aef100f55759593957f589776 /core/fpu_ctrl.cpp
parent23cc00ea16bdfbb06ae49cde0e05db6ec4a07100 (diff)
Avoid inline assembly for getting/setting SSE state
Diffstat (limited to 'core/fpu_ctrl.cpp')
-rw-r--r--core/fpu_ctrl.cpp69
1 files changed, 49 insertions, 20 deletions
diff --git a/core/fpu_ctrl.cpp b/core/fpu_ctrl.cpp
index 0cf0d6e7..701f517d 100644
--- a/core/fpu_ctrl.cpp
+++ b/core/fpu_ctrl.cpp
@@ -8,38 +8,69 @@
#endif
#ifdef HAVE_SSE_INTRINSICS
#include <emmintrin.h>
-#ifndef _MM_DENORMALS_ZERO_MASK
+#elif defined(HAVE_SSE)
+#include <xmmintrin.h>
+#endif
+
+#if defined(HAVE_SSE) && !defined(_MM_DENORMALS_ZERO_MASK)
/* Some headers seem to be missing these? */
#define _MM_DENORMALS_ZERO_MASK 0x0040u
#define _MM_DENORMALS_ZERO_ON 0x0040u
#endif
-#endif
#include "cpu_caps.h"
+namespace {
-void FPUCtl::enter() noexcept
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+[[gnu::target("sse")]]
+#endif
+void disable_denormals(unsigned int *state [[maybe_unused]])
{
- if(this->in_mode) return;
-
#if defined(HAVE_SSE_INTRINSICS)
- this->sse_state = _mm_getcsr();
- unsigned int sseState{this->sse_state};
+ *state = _mm_getcsr();
+ unsigned int sseState{*state};
sseState &= ~(_MM_FLUSH_ZERO_MASK | _MM_DENORMALS_ZERO_MASK);
sseState |= _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON;
_mm_setcsr(sseState);
-#elif defined(__GNUC__) && defined(HAVE_SSE)
+#elif defined(HAVE_SSE)
- if((CPUCapFlags&CPU_CAP_SSE))
+ *state = _mm_getcsr();
+ unsigned int sseState{*state};
+ sseState &= ~_MM_FLUSH_ZERO_MASK;
+ sseState |= _MM_FLUSH_ZERO_ON;
+ if((CPUCapFlags&CPU_CAP_SSE2))
{
- __asm__ __volatile__("stmxcsr %0" : "=m" (*&this->sse_state));
- unsigned int sseState{this->sse_state};
- sseState |= 0x8000; /* set flush-to-zero */
- if((CPUCapFlags&CPU_CAP_SSE2))
- sseState |= 0x0040; /* set denormals-are-zero */
- __asm__ __volatile__("ldmxcsr %0" : : "m" (*&sseState));
+ sseState &= ~_MM_DENORMALS_ZERO_MASK;
+ sseState |= _MM_DENORMALS_ZERO_ON;
}
+ _mm_setcsr(sseState);
+#endif
+}
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+[[gnu::target("sse")]]
+#endif
+void reset_fpu(unsigned int state [[maybe_unused]])
+{
+#if defined(HAVE_SSE_INTRINSICS) || defined(HAVE_SSE)
+ _mm_setcsr(state);
+#endif
+}
+
+} // namespace
+
+
+void FPUCtl::enter() noexcept
+{
+ if(this->in_mode) return;
+
+#if defined(HAVE_SSE_INTRINSICS)
+ disable_denormals(&this->sse_state);
+#elif defined(HAVE_SSE)
+ if((CPUCapFlags&CPU_CAP_SSE))
+ disable_denormals(&this->sse_state);
#endif
this->in_mode = true;
@@ -50,12 +81,10 @@ void FPUCtl::leave() noexcept
if(!this->in_mode) return;
#if defined(HAVE_SSE_INTRINSICS)
- _mm_setcsr(this->sse_state);
-
-#elif defined(__GNUC__) && defined(HAVE_SSE)
-
+ reset_fpu(this->sse_state);
+#elif defined(HAVE_SSE)
if((CPUCapFlags&CPU_CAP_SSE))
- __asm__ __volatile__("ldmxcsr %0" : : "m" (*&this->sse_state));
+ reset_fpu(this->sse_state);
#endif
this->in_mode = false;
}