summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/util/u_half.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/util/u_half.py')
-rw-r--r--src/gallium/auxiliary/util/u_half.py179
1 files changed, 179 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/util/u_half.py b/src/gallium/auxiliary/util/u_half.py
new file mode 100644
index 00000000000..8007482e971
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_half.py
@@ -0,0 +1,179 @@
+# Copyright 2010 Luca Barbieri
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial
+# portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# *************************************************************************
+
+# The code is a reimplementation of the algorithm in
+# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
+# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
+#
+# The table contents have been slightly changed so that the exponent
+# bias is now in the exponent table instead of the mantissa table (mostly
+# for cosmetic reasons, and because it theoretically allows a variant
+# that flushes denormal to zero but uses a mantissa table with 24-bit
+# entries).
+#
+# The tables are also constructed slightly differently.
+#
+
+# Note that using a 64K * 4 table is a terrible idea since it will not fit
+# in the L1 cache and will massively pollute the L2 cache as well
+#
+# These should instead fit in the L1 cache.
+#
+# TODO: we could use a denormal bias table instead of the mantissa/offset
+# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
+# but would involve more computation
+#
+# Note however that if denormals are never encountered, the L1 cache usage
+# is only about 4608 bytes anyway.
+
+table_index = None
+table_length = None
+
+def begin(t, n, l):
+ global table_length
+ global table_index
+ table_index = 0
+ table_length = l
+ print
+ print "const " + t + " " + n + "[" + str(l) + "] = {"
+
+def value(v):
+ global table_index
+ table_index += 1
+ print "\t" + hex(v) + ","
+
+def end():
+ global table_length
+ global table_index
+ print "};"
+ assert table_index == table_length
+
+print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
+print "#include \"util/u_half.h\""
+
+begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
+# zero
+value(0)
+
+# denormals
+for i in xrange(1, 1024):
+ m = i << 13
+ e = 0
+
+ # normalize number
+ while (m & 0x00800000) == 0:
+ e -= 0x00800000;
+ m <<= 1;
+
+ m &= ~0x00800000;
+ e += 0x38800000;
+ value(m | e)
+
+# normals
+for i in xrange(1024, 2048):
+ value((i - 1024) << 13)
+end()
+
+begin("uint32_t", "util_half_to_float_exponent_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive numbers
+for i in xrange(1, 31):
+ value(0x38000000 + (i << 23))
+
+# positive infinity/NaN
+value(0x7f800000)
+
+# negative zero or denormals
+value(0x80000000)
+
+# negative numbers
+for i in range(33, 63):
+ value(0xb8000000 + ((i - 32) << 23))
+
+# negative infinity/NaN
+value(0xff800000)
+end()
+
+begin("uint32_t", "util_half_to_float_offset_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive normals
+for i in range(1, 32):
+ value(1024)
+
+# negative zero or denormals
+value(0)
+
+# negative normals
+for i in xrange(33, 64):
+ value(1024)
+end()
+
+begin("uint16_t", "util_float_to_half_base_table", 512)
+for sign in (0, 0x8000):
+ # very small numbers mapping to zero
+ for i in xrange(-127, -24):
+ value(sign | 0)
+
+ # small numbers mapping to denormals
+ for i in xrange(-24, -14):
+ value(sign | (0x400 >> (-14 -i)))
+
+ # normal numbers
+ for i in xrange(-14, 16):
+ value(sign | ((i + 15) << 10))
+
+ # large numbers mapping to infinity
+ for i in xrange(16, 128):
+ value(sign | 0x7c00)
+
+ # infinity and NaNs
+ value(sign | 0x7c00)
+end()
+
+begin("uint8_t", "util_float_to_half_shift_table", 512)
+for sign in (0, 0x8000):
+ # very small numbers mapping to zero
+ for i in xrange(-127, -24):
+ value(24)
+
+ # small numbers mapping to denormals
+ for i in xrange(-24, -14):
+ value(-1 - i)
+
+ # normal numbers
+ for i in xrange(-14, 16):
+ value(13)
+
+ # large numbers mapping to infinity
+ for i in xrange(16, 128):
+ value(24)
+
+ # infinity and NaNs
+ value(13)
+end()
+