aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorRoland Scheidegger <[email protected]>2020-03-24 19:54:06 +0100
committerMarge Bot <[email protected]>2020-03-25 19:16:13 +0000
commit3cbcb1b73e5f764ed87fdcd1dea8a921e73bfd82 (patch)
tree2d770ce2b5873ec416117f3f625f9cf1168fc523 /src/gallium/auxiliary
parent9e78f17b74a862e34891901cde8292f91adeb655 (diff)
gallium/util: Add back (and rename) util_float_to_half implementation
This implementation was removed by 8b8af6d3 ("gallium/util: Switch util_float_to_half to _mesa_float_to_half()'s impl.") It was not actually broken, but _mesa_float_to_half() implements round-to-nearest-even, whereas util_float_to_half() implemented round-to-zero. So rename it appropriately. GL actually never cares about rounding (except a broken piglit test), however d3d10 very much does and requires RTZ for float to half conversion. Moreover, apparently at least radeon gpus actually always do RTZ when doing RT writes (and I'd suspect for shader image writes as well). Hence it seems appropriate to hook up this rtz function to the format instead. This will cause llvmpipe and softpipe to use rtz rounding for clears with half float formats, and softpipe would use rtz behavior for rt writes as well (llvmpipe has that hardcoded), not sure if "real" hw drivers hit this function for much. (For shader opcodes would still need to figure out what rounding to use appropriately, but this is a question for another day.) Note should probably unify with _mesa_float_to_float16_rtz. Unclear at this point which one is better, so just restore previous function here. Reviewed-by: Marek Olšák <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4312> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4312>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/util/u_half.h57
1 files changed, 57 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
index a107dcb74b1..bbcc843c310 100644
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -50,6 +50,63 @@ util_float_to_half(float f)
return _mesa_float_to_half(f);
}
+static inline uint16_t
+util_float_to_half_rtz(float f)
+{
+ uint32_t sign_mask = 0x80000000;
+ uint32_t round_mask = ~0xfff;
+ uint32_t f32inf = 0xff << 23;
+ uint32_t f16inf = 0x1f << 23;
+ uint32_t sign;
+ union fi magic;
+ union fi f32;
+ uint16_t f16;
+
+ magic.ui = 0xf << 23;
+
+ f32.f = f;
+
+ /* Sign */
+ sign = f32.ui & sign_mask;
+ f32.ui ^= sign;
+
+ if (f32.ui == f32inf) {
+ /* Inf */
+ f16 = 0x7c00;
+ } else if (f32.ui > f32inf) {
+ /* NaN */
+ f16 = 0x7e00;
+ } else {
+ /* Number */
+ f32.ui &= round_mask;
+ f32.f *= magic.f;
+ f32.ui -= round_mask;
+ /*
+ * XXX: The magic mul relies on denorms being available, otherwise
+ * all f16 denorms get flushed to zero - hence when this is used
+ * for tgsi_exec in softpipe we won't get f16 denorms.
+ */
+ /*
+ * Clamp to max finite value if overflowed.
+ * OpenGL has completely undefined rounding behavior for float to
+ * half-float conversions, and this matches what is mandated for float
+ * to fp11/fp10, which recommend round-to-nearest-finite too.
+ * (d3d10 is deeply unhappy about flushing such values to infinity, and
+ * while it also mandates round-to-zero it doesn't care nearly as much
+ * about that.)
+ */
+ if (f32.ui > f16inf)
+ f32.ui = f16inf - 1;
+
+ f16 = f32.ui >> 13;
+ }
+
+ /* Sign */
+ f16 |= sign >> 16;
+
+ return f16;
+}
+
static inline float
util_half_to_float(uint16_t f16)
{