6 files changed, 191 insertions, 170 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 1db4aaa4398..843778d8100 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -169,7 +169,8 @@ GALLIVM_CPP_SOURCES = \
 GENERATED_SOURCES = \
 	indices/u_indices_gen.c \
 	indices/u_unfilled_gen.c \
-	util/u_format_table.c
+	util/u_format_table.c \
+	util/u_half.c
 
 
 ifeq ($(MESA_LLVM),1)
@@ -198,3 +199,5 @@ util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_forma
 util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv
 	python util/u_format_access.py util/u_format.csv > $@
 
+util/u_half.c: util/u_half.py
+	python util/u_half.py > $@
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index d0443db3f78..73d4150448f 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -29,6 +29,14 @@ env.CodeGenerate(
     source = ['util/u_format.csv'],
     command = 'python $SCRIPT $SOURCE > $TARGET'
 )
+
+env.CodeGenerate(
+    target = 'util/u_half.c',
+    script = 'util/u_half.py',
+    source = [],
+    command = 'python $SCRIPT > $TARGET'
+)
+
 env.Depends('util/u_format_table.c', [
     'util/u_format_parse.py', 
     'util/u_format_pack.py', 
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index d3ee1f03396..fae0a462dcb 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -124,5 +124,4 @@ void
 util_format_do_init(void)
 {
    util_format_s3tc_init();
-   util_half_init();
 }
diff --git a/src/gallium/auxiliary/util/u_half.c b/src/gallium/auxiliary/util/u_half.c
deleted file mode 100644
index 4c8f8a51c52..00000000000
--- a/src/gallium/auxiliary/util/u_half.c
+++ /dev/null
@@ -1,165 +0,0 @@
-
-/*
- * Copyright 2010 Luca Barbieri
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* The code is a reimplementation of the algorithm in
- *  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
- * "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
- *
- * The table contents have been slightly changed so that the exponent
- * bias is now in the exponent table instead of the mantissa table (mostly
- * for cosmetic reasons, and because it theoretically allows a variant
- * that flushes denormal to zero but uses a mantissa table with 24-bit
- * entries).
- *
- * The tables are also constructed slightly differently.
- */
-
-/* Note that using a 64K * 4 table is a terrible idea since it will not fit
- * in the L1 cache and will massively pollute the L2 cache as well
- *
- * These should instead fit in the L1 cache.
- *
- * TODO: we could use a denormal bias table instead of the mantissa/offset
- * tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
- * but would involve more computation
- *
- * Note however that if denormals are never encountered, the L1 cache usage
- * is only about 4608 bytes anyway.
- */
-
-#include "util/u_half.h"
-#include "util/u_init.h"
-
-uint32_t util_half_to_float_mantissa_table[2048];
-uint32_t util_half_to_float_exponent_table[64];
-uint32_t util_half_to_float_offset_table[64];
-uint16_t util_float_to_half_base_table[512];
-uint8_t util_float_to_half_shift_table[512];
-
-boolean util_half_inited;
-
-void
-util_half_do_init(void)
-{
-   int i;
-
-   /* zero */
-   util_half_to_float_mantissa_table[0] = 0;
-
-   /* denormals */
-   for(i = 1; i < 1024; ++i)
-   {
-      unsigned int m = i << 13;
-      unsigned int e = 0;
-
-      /* Normalize number */
-      while(!(m & 0x00800000))
-      {
-	 e -= 0x00800000;
-	 m <<= 1;
-      }
-      m &= ~0x00800000;
-      e += 0x38800000;
-      util_half_to_float_mantissa_table[i] = m | e;
-   }
-
-   /* normals */
-   for(i = 1024; i < 2048; ++i)
-      util_half_to_float_mantissa_table[i] = ((i - 1024) << 13);
-
-   /* positive zero or denormals */
-   util_half_to_float_exponent_table[0] = 0;
-
-   /* positive numbers */
-   for(i = 1; i <= 30; ++i)
-      util_half_to_float_exponent_table[i] = 0x38000000 + (i << 23);
-
-   /* positive infinity/NaN */
-   util_half_to_float_exponent_table[31] = 0x7f800000;
-
-   /* negative zero or denormals */
-   util_half_to_float_exponent_table[32] = 0x80000000;
-
-   /* negative numbers */
-   for(i = 33; i <= 62; ++i)
-      util_half_to_float_exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
-
-   /* negative infinity/NaN */
-   util_half_to_float_exponent_table[63] = 0xff800000;
-
-   /* positive zero or denormals */
-   util_half_to_float_offset_table[0] = 0;
-
-   /* positive normals */
-   for(i = 1; i < 32; ++i)
-      util_half_to_float_offset_table[i] = 1024;
-
-   /* negative zero or denormals */
-   util_half_to_float_offset_table[32] = 0;
-
-   /* negative normals */
-   for(i = 33; i < 64; ++i)
-      util_half_to_float_offset_table[i] = 1024;
-
-   /* very small numbers mapping to zero */
-   for(i = -127; i < -24; ++i)
-   {
-      util_float_to_half_base_table[127 + i] = 0;
-      util_float_to_half_shift_table[127 + i] = 24;
-   }
-
-   /* small numbers mapping to denormals */
-   for(i = -24; i < -14; ++i)
-   {
-      util_float_to_half_base_table[127 + i] = 0x0400 >> (-14 - i);
-      util_float_to_half_shift_table[127 + i] = -i - 1;
-   }
-
-   /* normal numbers */
-   for(i = -14; i < 16; ++i)
-   {
-      util_float_to_half_base_table[127 + i] = (i + 15) << 10;
-      util_float_to_half_shift_table[127 + i] = 13;
-   }
-
-   /* large numbers mapping to infinity */
-   for(i = 16; i < 128; ++i)
-   {
-      util_float_to_half_base_table[127 + i] = 0x7c00;
-      util_float_to_half_shift_table[127 + i] = 24;
-   }
-
-   /* infinity and NaNs */
-   util_float_to_half_base_table[255] = 0x7c00;
-   util_float_to_half_shift_table[255] = 13;
-
-   /* negative numbers */
-   for(i = 0; i < 256; ++i)
-   {
-      util_float_to_half_base_table[256 + i] = util_float_to_half_base_table[i] | 0x8000;
-      util_float_to_half_shift_table[256 + i] = util_float_to_half_shift_table[i];
-   }
-}
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
index 02f0f241936..a28b1fd1d93 100644
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -3,7 +3,6 @@
 
 #include "pipe/p_compiler.h"
 #include "util/u_math.h"
-#include "util/u_inline_init.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -56,8 +55,6 @@ util_float_to_half(float f)
    return util_floatui_to_half(i.ui);
 }
 
-UTIL_INLINE_INIT(util_half);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/auxiliary/util/u_half.py b/src/gallium/auxiliary/util/u_half.py
new file mode 100644
index 00000000000..a92f758750c
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_half.py
@@ -0,0 +1,179 @@
+# Copyright 2010 Luca Barbieri
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial
+# portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# *************************************************************************
+
+# The code is a reimplementation of the algorithm in
+#  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
+# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
+#
+# The table contents have been slightly changed so that the exponent
+# bias is now in the exponent table instead of the mantissa table (mostly
+# for cosmetic reasons, and because it theoretically allows a variant
+# that flushes denormal to zero but uses a mantissa table with 24-bit
+# entries).
+#
+# The tables are also constructed slightly differently.
+#
+
+# Note that using a 64K * 4 table is a terrible idea since it will not fit
+# in the L1 cache and will massively pollute the L2 cache as well
+#
+# These should instead fit in the L1 cache.
+#
+# TODO: we could use a denormal bias table instead of the mantissa/offset
+# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
+# but would involve more computation
+#
+# Note however that if denormals are never encountered, the L1 cache usage
+# is only about 4608 bytes anyway.
+
+table_index = None
+table_length = None
+
+def begin(t, n, l):
+	global table_length
+	global table_index
+	table_index = 0
+	table_length = l
+	print
+	print t + " " + n + "[" + str(l) + "] = {"
+
+def value(v):
+	global table_index
+	table_index += 1
+	print "\t" + hex(v) + ","
+
+def end():
+	global table_length
+	global table_index
+	print "};"
+	assert table_index == table_length
+
+print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
+print "#include \"util/u_half.h\""
+
+begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
+# zero
+value(0)
+
+# denormals
+for i in xrange(1, 1024):
+	m = i << 13
+	e = 0
+
+	# normalize number
+	while (m & 0x00800000) == 0:
+		e -= 0x00800000;
+		m <<= 1;
+
+	m &= ~0x00800000;
+	e += 0x38800000;
+	value(m | e)
+
+# normals
+for i in xrange(1024, 2048):
+	value((i - 1024) << 13)
+end()
+
+begin("uint32_t", "util_half_to_float_exponent_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive numbers
+for i in xrange(1, 31):
+	value(0x38000000 + (i << 23))
+
+# positive infinity/NaN
+value(0x7f800000)
+
+# negative zero or denormals
+value(0x80000000)
+
+# negative numbers
+for i in range(33, 63):
+	value(0xb8000000 + ((i - 32) << 23))
+
+# negative infinity/NaN
+value(0xff800000)
+end()
+
+begin("uint32_t", "util_half_to_float_offset_table", 64)
+# positive zero or denormals
+value(0)
+
+# positive normals
+for i in range(1, 32):
+	value(1024)
+
+# negative zero or denormals
+value(0)
+
+# negative normals
+for i in xrange(33, 64):
+	value(1024)
+end()
+
+begin("uint16_t", "util_float_to_half_base_table", 512)
+for sign in (0, 0x8000):
+	# very small numbers mapping to zero
+	for i in xrange(-127, -24):
+		value(sign | 0)
+
+	# small numbers mapping to denormals
+	for i in xrange(-24, -14):
+		value(sign | (0x400 >> (-14 -i)))
+
+	# normal numbers
+	for i in xrange(-14, 16):
+		value(sign | ((i + 15) << 10))
+
+	# large numbers mapping to infinity
+	for i in xrange(16, 128):
+		value(sign | 0x7c00)
+
+	# infinity and NaNs
+	value(sign | 0x7c00)
+end()
+
+begin("uint8_t", "util_float_to_half_shift_table", 512)
+for sign in (0, 0x8000):
+	# very small numbers mapping to zero
+	for i in xrange(-127, -24):
+		value(24)
+
+	# small numbers mapping to denormals
+	for i in xrange(-24, -14):
+		value(-1 - i)
+
+	# normal numbers
+	for i in xrange(-14, 16):
+		value(13)
+
+	# large numbers mapping to infinity
+	for i in xrange(16, 128):
+		value(24)
+
+	# infinity and NaNs
+	value(13)
+end()
+