1 files changed, 42 insertions, 45 deletions
diff --git a/src/gallium/auxiliary/util/u_format_rgb9e5.h b/src/gallium/auxiliary/util/u_format_rgb9e5.h
index d11bfa833f1..21feba7b710 100644
--- a/src/gallium/auxiliary/util/u_format_rgb9e5.h
+++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h
@@ -74,62 +74,59 @@ typedef union {
    } field;
 } rgb9e5;
 
-static inline float rgb9e5_ClampRange(float x)
-{
-   if (x > 0.0f) {
-      if (x >= MAX_RGB9E5) {
-         return MAX_RGB9E5;
-      } else {
-         return x;
-      }
-   } else {
-      /* NaN gets here too since comparisons with NaN always fail! */
-      return 0.0f;
-   }
-}
 
-/* Ok, FloorLog2 is not correct for the denorm and zero values, but we
-   are going to do a max of this value with the minimum rgb9e5 exponent
-   that will hide these problem cases. */
-static inline int rgb9e5_FloorLog2(float x)
+static inline int rgb9e5_ClampRange(float x)
 {
    float754 f;
-
+   float754 max;
    f.value = x;
-   return (f.field.biasedexponent - 127);
+   max.value = MAX_RGB9E5;
+
+   if (f.raw > 0x7f800000)
+  /* catches neg, NaNs */
+      return 0;
+   else if (f.raw >= max.raw)
+      return max.raw;
+   else
+      return f.raw;
 }
 
 static inline unsigned float3_to_rgb9e5(const float rgb[3])
 {
    rgb9e5 retval;
-   float maxrgb;
-   int rm, gm, bm;
-   float rc, gc, bc;
-   int exp_shared, maxm;
+   int rm, gm, bm, exp_shared;
    float754 revdenom = {0};
-
-   rc = rgb9e5_ClampRange(rgb[0]);
-   gc = rgb9e5_ClampRange(rgb[1]);
-   bc = rgb9e5_ClampRange(rgb[2]);
-
-   maxrgb = MAX3(rc, gc, bc);
-   exp_shared = MAX2(-RGB9E5_EXP_BIAS - 1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
+   float754 rc, bc, gc, maxrgb;
+
+   rc.raw = rgb9e5_ClampRange(rgb[0]);
+   gc.raw = rgb9e5_ClampRange(rgb[1]);
+   bc.raw = rgb9e5_ClampRange(rgb[2]);
+   maxrgb.raw = MAX3(rc.raw, gc.raw, bc.raw);
+
+   /*
+    * Compared to what the spec suggests, instead of conditionally adjusting
+    * the exponent after the fact do it here by doing the equivalent of +0.5 -
+    * the int add will spill over into the exponent in this case.
+    */
+   maxrgb.raw += maxrgb.raw & (1 << (23-9));
+   exp_shared = MAX2((maxrgb.raw >> 23), -RGB9E5_EXP_BIAS - 1 + 127) +
+                1 + RGB9E5_EXP_BIAS - 127;
+   revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS -
+                                          RGB9E5_MANTISSA_BITS) + 1;
    assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
-   assert(exp_shared >= 0);
-   revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);
-
-   maxm = (int) (maxrgb * revdenom.value + 0.5);
-   if (maxm == MAX_RGB9E5_MANTISSA + 1) {
-      revdenom.value *= 0.5f;
-      exp_shared += 1;
-      assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
-   } else {
-      assert(maxm <= MAX_RGB9E5_MANTISSA);
-   }
-
-   rm = (int) (rc * revdenom.value + 0.5);
-   gm = (int) (gc * revdenom.value + 0.5);
-   bm = (int) (bc * revdenom.value + 0.5);
+
+   /*
+    * The spec uses strict round-up behavior (d3d10 disagrees, but in any case
+    * must match what is done above for figuring out exponent).
+    * We avoid the doubles ((int) rc * revdenom + 0.5) by doing the rounding
+    * ourselves (revdenom was adjusted by +1, above).
+    */
+   rm = (int) (rc.value * revdenom.value);
+   gm = (int) (gc.value * revdenom.value);
+   bm = (int) (bc.value * revdenom.value);
+   rm = (rm & 1) + (rm >> 1);
+   gm = (gm & 1) + (gm >> 1);
+   bm = (bm & 1) + (bm >> 1);
 
    assert(rm <= MAX_RGB9E5_MANTISSA);
    assert(gm <= MAX_RGB9E5_MANTISSA);