diff options
author | Matt Turner <[email protected]> | 2015-02-20 20:27:06 -0800 |
---|---|---|
committer | Matt Turner <[email protected]> | 2015-02-23 10:49:48 -0800 |
commit | bb2a897dbc8064ccd38ecbf3f484abc0b900a862 (patch) | |
tree | 3c59cb1c5a6c30505406351ae7f4ce9cfedff05d | |
parent | 08bc7cf8f641812a48e9d9a416611795e2736489 (diff) |
mesa: Move START/END_FAST_MATH macros to their only use.
Reviewed-by: Eric Anholt <[email protected]>
-rw-r--r-- | src/mesa/main/compiler.h | 79 | ||||
-rw-r--r-- | src/mesa/tnl/t_pipeline.c | 78 |
2 files changed, 78 insertions, 79 deletions
diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index d513284bb68..0884742cc1b 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -240,85 +240,6 @@ static inline GLuint CPU_TO_LE32(GLuint x) #define IEEE_ONE 0x3f800000 -/** - * START/END_FAST_MATH macros: - * - * START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save - * original mode to a temporary). - * END_FAST_MATH: Restore x86 FPU to original mode. - */ -#if defined(__GNUC__) && defined(__i386__) -/* - * Set the x86 FPU control word to guarentee only 32 bits of precision - * are stored in registers. Allowing the FPU to store more introduces - * differences between situations where numbers are pulled out of memory - * vs. situations where the compiler is able to optimize register usage. - * - * In the worst case, we force the compiler to use a memory access to - * truncate the float, by specifying the 'volatile' keyword. - */ -/* Hardware default: All exceptions masked, extended double precision, - * round to nearest (IEEE compliant): - */ -#define DEFAULT_X86_FPU 0x037f -/* All exceptions masked, single precision, round to nearest: - */ -#define FAST_X86_FPU 0x003f -/* The fldcw instruction will cause any pending FP exceptions to be - * raised prior to entering the block, and we clear any pending - * exceptions before exiting the block. Hence, asm code has free - * reign over the FPU while in the fast math block. - */ -#if defined(NO_FAST_MATH) -#define START_FAST_MATH(x) \ -do { \ - static GLuint mask = DEFAULT_X86_FPU; \ - __asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \ - __asm__ ( "fldcw %0" : : "m" (mask) ); \ -} while (0) -#else -#define START_FAST_MATH(x) \ -do { \ - static GLuint mask = FAST_X86_FPU; \ - __asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \ - __asm__ ( "fldcw %0" : : "m" (mask) ); \ -} while (0) -#endif -/* Restore original FPU mode, and clear any exceptions that may have - * occurred in the FAST_MATH block. - */ -#define END_FAST_MATH(x) \ -do { \ - __asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) ); \ -} while (0) - -#elif defined(_MSC_VER) && defined(_M_IX86) -#define DEFAULT_X86_FPU 0x037f /* See GCC comments above */ -#define FAST_X86_FPU 0x003f /* See GCC comments above */ -#if defined(NO_FAST_MATH) -#define START_FAST_MATH(x) do {\ - static GLuint mask = DEFAULT_X86_FPU;\ - __asm fnstcw word ptr [x]\ - __asm fldcw word ptr [mask]\ -} while(0) -#else -#define START_FAST_MATH(x) do {\ - static GLuint mask = FAST_X86_FPU;\ - __asm fnstcw word ptr [x]\ - __asm fldcw word ptr [mask]\ -} while(0) -#endif -#define END_FAST_MATH(x) do {\ - __asm fnclex\ - __asm fldcw word ptr [x]\ -} while(0) - -#else -#define START_FAST_MATH(x) x = 0 -#define END_FAST_MATH(x) (void)(x) -#endif - - #ifndef Elements #define Elements(x) (sizeof(x)/sizeof(*(x))) #endif diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 78508aecf8c..0adbef01264 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -112,6 +112,84 @@ static GLuint check_output_changes( struct gl_context *ctx ) #endif } +/** + * START/END_FAST_MATH macros: + * + * START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save + * original mode to a temporary). + * END_FAST_MATH: Restore x86 FPU to original mode. + */ +#if defined(__GNUC__) && defined(__i386__) +/* + * Set the x86 FPU control word to guarentee only 32 bits of precision + * are stored in registers. Allowing the FPU to store more introduces + * differences between situations where numbers are pulled out of memory + * vs. situations where the compiler is able to optimize register usage. + * + * In the worst case, we force the compiler to use a memory access to + * truncate the float, by specifying the 'volatile' keyword. + */ +/* Hardware default: All exceptions masked, extended double precision, + * round to nearest (IEEE compliant): + */ +#define DEFAULT_X86_FPU 0x037f +/* All exceptions masked, single precision, round to nearest: + */ +#define FAST_X86_FPU 0x003f +/* The fldcw instruction will cause any pending FP exceptions to be + * raised prior to entering the block, and we clear any pending + * exceptions before exiting the block. Hence, asm code has free + * reign over the FPU while in the fast math block. + */ +#if defined(NO_FAST_MATH) +#define START_FAST_MATH(x) \ +do { \ + static GLuint mask = DEFAULT_X86_FPU; \ + __asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \ + __asm__ ( "fldcw %0" : : "m" (mask) ); \ +} while (0) +#else +#define START_FAST_MATH(x) \ +do { \ + static GLuint mask = FAST_X86_FPU; \ + __asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \ + __asm__ ( "fldcw %0" : : "m" (mask) ); \ +} while (0) +#endif +/* Restore original FPU mode, and clear any exceptions that may have + * occurred in the FAST_MATH block. + */ +#define END_FAST_MATH(x) \ +do { \ + __asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) ); \ +} while (0) + +#elif defined(_MSC_VER) && defined(_M_IX86) +#define DEFAULT_X86_FPU 0x037f /* See GCC comments above */ +#define FAST_X86_FPU 0x003f /* See GCC comments above */ +#if defined(NO_FAST_MATH) +#define START_FAST_MATH(x) do {\ + static GLuint mask = DEFAULT_X86_FPU;\ + __asm fnstcw word ptr [x]\ + __asm fldcw word ptr [mask]\ +} while(0) +#else +#define START_FAST_MATH(x) do {\ + static GLuint mask = FAST_X86_FPU;\ + __asm fnstcw word ptr [x]\ + __asm fldcw word ptr [mask]\ +} while(0) +#endif +#define END_FAST_MATH(x) do {\ + __asm fnclex\ + __asm fldcw word ptr [x]\ +} while(0) + +#else +#define START_FAST_MATH(x) x = 0 +#define END_FAST_MATH(x) (void)(x) +#endif + void _tnl_run_pipeline( struct gl_context *ctx ) { |