From f9b1e5241facc8cf255c258082d5cb5b04783e93 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 4 Mar 2003 16:33:53 +0000 Subject: added _mesa_inv_sqrtf() and INV_SQRTF() (Josh Vanderhoof) --- src/mesa/main/imports.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) (limited to 'src/mesa/main/imports.c') diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index e9d579e7640..8474ed4abcf 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -1,4 +1,4 @@ -/* $Id: imports.c,v 1.32 2003/03/01 01:50:21 brianp Exp $ */ +/* $Id: imports.c,v 1.33 2003/03/04 16:33:53 brianp Exp $ */ /* * Mesa 3-D graphics library @@ -346,6 +346,7 @@ _mesa_sqrtf( float x ) * then reconstruct the result back into a float */ num.i = ((sqrttab[num.i >> 16]) << 16) | ((e + 127) << 23); + return num.f; #else return (float) _mesa_sqrtd((double) x); @@ -353,6 +354,115 @@ _mesa_sqrtf( float x ) } +/** + inv_sqrt - A single precision 1/sqrt routine for IEEE format floats. + written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk + and Vesa Karvonen. +*/ +float +_mesa_inv_sqrtf(float n) +{ +#if defined(USE_IEEE) && !defined(DEBUG) + float r0, x0, y0; + float r1, x1, y1; + float r2, x2, y2; +#if 0 /* not used, see below -BP */ + float r3, x3, y3; +#endif + union { float f; unsigned int i; } u; + unsigned int magic; + + /* + Exponent part of the magic number - + + We want to: + 1. subtract the bias from the exponent, + 2. negate it + 3. divide by two (rounding towards -inf) + 4. add the bias back + + Which is the same as subtracting the exponent from 381 and dividing + by 2. + + floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2) + */ + + magic = 381 << 23; + + /* + Significand part of magic number - + + With the current magic number, "(magic - u.i) >> 1" will give you: + + for 1 <= u.f <= 2: 1.25 - u.f / 4 + for 2 <= u.f <= 4: 1.00 - u.f / 8 + + This isn't a bad approximation of 1/sqrt. The maximum difference from + 1/sqrt will be around .06. After three Newton-Raphson iterations, the + maximum difference is less than 4.5e-8. (Which is actually close + enough to make the following bias academic...) + + To get a better approximation you can add a bias to the magic + number. For example, if you subtract 1/2 of the maximum difference in + the first approximation (.03), you will get the following function: + + for 1 <= u.f <= 2: 1.22 - u.f / 4 + for 2 <= u.f <= 3.76: 0.97 - u.f / 8 + for 3.76 <= u.f <= 4: 0.72 - u.f / 16 + (The 3.76 to 4 range is where the result is < .5.) + + This is the closest possible initial approximation, but with a maximum + error of 8e-11 after three NR iterations, it is still not perfect. If + you subtract 0.0332281 instead of .03, the maximum error will be + 2.5e-11 after three NR iterations, which should be about as close as + is possible. + + for 1 <= u.f <= 2: 1.2167719 - u.f / 4 + for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8 + for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16 + + */ + + magic -= (int)(0.0332281 * (1 << 25)); + + u.f = n; + u.i = (magic - u.i) >> 1; + + /* + Instead of Newton-Raphson, we use Goldschmidt's algorithm, which + allows more parallelism. From what I understand, the parallelism + comes at the cost of less precision, because it lets error + accumulate across iterations. + */ + x0 = 1.0f; + y0 = 0.5f * n; + r0 = u.f; + + x1 = x0 * r0; + y1 = y0 * r0 * r0; + r1 = 1.5f - y1; + + x2 = x1 * r1; + y2 = y1 * r1 * r1; + r2 = 1.5f - y2; + +#if 1 + return x2 * r2; /* we can stop here, and be conformant -BP */ +#else + x3 = x2 * r2; + y3 = y2 * r2 * r2; + r3 = 1.5f - y3; + + return x3 * r3; +#endif +#elif defined(XFree86LOADER) && defined(IN_MODULE) + return 1.0F / xf86sqrt(n); +#else + return 1.0F / sqrt(n); +#endif +} + + double _mesa_pow(double x, double y) { -- cgit v1.2.3