14 files changed, 3757 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/common/depthtmp.h b/src/mesa/drivers/dri/common/depthtmp.h
new file mode 100644
index 00000000000..f3da61e5199
--- /dev/null
+++ b/src/mesa/drivers/dri/common/depthtmp.h
@@ -0,0 +1,176 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/depthtmp.h,v 1.5 2001/03/21 16:14:20 dawes Exp $ */
+
+#ifndef DBG
+#define DBG 0
+#endif
+
+
+#ifndef HAVE_HW_DEPTH_SPANS
+#define HAVE_HW_DEPTH_SPANS 0
+#endif
+#ifndef HAVE_HW_DEPTH_PIXELS
+#define HAVE_HW_DEPTH_PIXELS 0
+#endif
+
+#ifndef HW_READ_LOCK
+#define HW_READ_LOCK()		HW_LOCK()
+#endif
+#ifndef HW_READ_UNLOCK
+#define HW_READ_UNLOCK()	HW_UNLOCK()
+#endif
+
+static void TAG(WriteDepthSpan)( GLcontext *ctx,
+                             GLuint n, GLint x, GLint y,
+				 const GLdepth *depth,
+				 const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_DEPTH_VARS;
+
+	 y = Y_FLIP( y );
+
+#if HAVE_HW_DEPTH_SPANS
+	 (void) x1; (void) n1;
+
+	 if ( DBG ) fprintf( stderr, "WriteDepthSpan 0..%d (x1 %d)\n",
+			     (int)n, (int)x );
+
+	 WRITE_DEPTH_SPAN();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN( x, y, n, x1, n1, i );
+
+	       if ( DBG ) fprintf( stderr, "WriteDepthSpan %d..%d (x1 %d)\n",
+				   (int)i, (int)n1, (int)x1 );
+
+	       if ( mask ) {
+		  for ( ; i < n1 ; i++, x1++ ) {
+		     if ( mask[i] ) WRITE_DEPTH( x1, y, depth[i] );
+		  }
+	       } else {
+		  for ( ; i < n1 ; i++, x1++ ) {
+		     WRITE_DEPTH( x1, y, depth[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteDepthPixels)( GLcontext *ctx,
+				   GLuint n,
+				   const GLint x[],
+				   const GLint y[],
+				   const GLdepth depth[],
+				   const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint i;
+	 LOCAL_DEPTH_VARS;
+
+	 if ( DBG ) fprintf( stderr, "WriteDepthPixels\n" );
+
+#if HAVE_HW_DEPTH_PIXELS
+	 (void) i;
+
+	 WRITE_DEPTH_PIXELS();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       for ( i = 0 ; i < n ; i++ ) {
+		  if ( mask[i] ) {
+		     const int fy = Y_FLIP( y[i] );
+		     if ( CLIPPIXEL( x[i], fy ) )
+			WRITE_DEPTH( x[i], fy, depth[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+/* Read depth spans and pixels
+ */
+static void TAG(ReadDepthSpan)( GLcontext *ctx,
+				GLuint n, GLint x, GLint y,
+				GLdepth depth[] )
+{
+   HW_READ_LOCK()
+      {
+	 GLint x1, n1;
+	 LOCAL_DEPTH_VARS;
+
+	 y = Y_FLIP( y );
+
+	 if ( DBG ) fprintf( stderr, "ReadDepthSpan\n" );
+
+#if HAVE_HW_DEPTH_SPANS
+	 (void) x1; (void) n1;
+
+	 READ_DEPTH_SPAN();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN( x, y, n, x1, n1, i );
+	       for ( ; i < n1 ; i++ )
+		  READ_DEPTH( depth[i], (x1+i), y );
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_READ_UNLOCK();
+}
+
+static void TAG(ReadDepthPixels)( GLcontext *ctx, GLuint n,
+				  const GLint x[], const GLint y[],
+				  GLdepth depth[] )
+{
+   HW_READ_LOCK()
+      {
+	 GLint i;
+	 LOCAL_DEPTH_VARS;
+
+	 if ( DBG ) fprintf( stderr, "ReadDepthPixels\n" );
+
+#if HAVE_HW_DEPTH_PIXELS
+	 (void) i;
+
+	 READ_DEPTH_PIXELS();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       for ( i = 0 ; i < n ;i++ ) {
+		  int fy = Y_FLIP( y[i] );
+		  if ( CLIPPIXEL( x[i], fy ) )
+		     READ_DEPTH( depth[i], x[i], fy );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_READ_UNLOCK();
+}
+
+
+#if HAVE_HW_DEPTH_SPANS
+#undef WRITE_DEPTH_SPAN
+#undef WRITE_DEPTH_PIXELS
+#undef READ_DEPTH_SPAN
+#undef READ_DEPTH_PIXELS
+#else
+#undef WRITE_DEPTH
+#undef READ_DEPTH
+#endif
+#undef TAG
diff --git a/src/mesa/drivers/dri/common/hwlog.c b/src/mesa/drivers/dri/common/hwlog.c
new file mode 100644
index 00000000000..ff6e7d92119
--- /dev/null
+++ b/src/mesa/drivers/dri/common/hwlog.c
@@ -0,0 +1,142 @@
+/*
+ * GLX Hardware Device Driver common code 
+ * 
+ * Based on the original MGA G200 driver (c) 1999 Wittawat Yamwong
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ *    Wittawat Yamwong <[email protected]>
+ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/hwlog.c,v 1.3 2001/08/18 02:51:03 dawes Exp $ */
+ 
+#include "hwlog.h"
+hwlog_t hwlog = { 0,0,0, "[???] "};
+
+
+/* Should be shared, but is this a good place for it?
+ */
+#include <sys/time.h>
+#include <stdarg.h>
+
+
+int usec( void ) 
+{
+   struct timeval tv;
+   struct timezone tz;
+   
+   gettimeofday( &tv, &tz );
+   
+   return (tv.tv_sec & 2047) * 1000000 + tv.tv_usec;
+}
+
+
+#ifdef HW_LOG_ENABLED
+int hwOpenLog(const char *filename, char *prefix)
+{
+  hwCloseLog();
+  hwSetLogLevel(0);
+  hwlog.prefix=prefix;
+  if (!filename)
+    return -1;
+  if ((hwlog.file = fopen(filename,"w")) == NULL)
+      return -1;
+  return 0;
+}
+
+void hwCloseLog()
+{
+  if (hwlog.file) {
+    fclose(hwlog.file);
+    hwlog.file = NULL;
+  }
+}
+
+int hwIsLogReady()
+{
+  return (hwlog.file != NULL);
+}
+
+void hwSetLogLevel(int level)
+{
+  hwlog.level = level;
+}
+
+int hwGetLogLevel()
+{
+  return hwlog.level;
+}
+
+void hwLog(int level, const char *format, ...)
+{
+  va_list ap;
+  va_start(ap,format);
+  hwLogv(level,format,ap);
+  va_end(ap);
+}
+
+void hwLogv(int l, const char *format, va_list ap)
+{
+  if (hwlog.file && (l <= hwlog.level)) {
+    vfprintf(hwlog.file,format,ap);
+    fflush(hwlog.file);
+  }
+}
+
+void hwMsg(int l, const char *format, ...)
+{
+  va_list ap;
+  va_start(ap, format);
+
+  if (l <= hwlog.level) {
+    if (hwIsLogReady()) {
+      int t = usec();
+
+      hwLog(l, "%6i:", t - hwlog.timeTemp);
+      hwlog.timeTemp = t;
+      hwLogv(l, format, ap);
+    } else {
+      fprintf(stderr, hwlog.prefix);
+      vfprintf(stderr, format, ap);
+    }
+  }
+
+  va_end(ap);
+}
+
+#else /* ifdef HW_LOG_ENABLED */
+
+int hwlogdummy()
+{
+  return 0;
+}
+
+#endif
+
+void hwError(const char *format, ...)
+{
+  va_list ap;
+  va_start(ap, format);
+
+  fprintf(stderr, hwlog.prefix);
+  vfprintf(stderr, format, ap);
+  hwLogv(0, format, ap);
+
+  va_end(ap);
+}
diff --git a/src/mesa/drivers/dri/common/hwlog.h b/src/mesa/drivers/dri/common/hwlog.h
new file mode 100644
index 00000000000..6c5a185552b
--- /dev/null
+++ b/src/mesa/drivers/dri/common/hwlog.h
@@ -0,0 +1,101 @@
+/*
+ * GLX Hardware Device Driver common code
+ *
+ * Based on the original MGA G200 driver (c) 1999 Wittawat Yamwong
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ *    Wittawat Yamwong <[email protected]>
+ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/hwlog.h,v 1.5 2001/10/31 23:22:57 tsi Exp $ */
+ 
+/* Usage:
+ * - use mgaError for error messages. Always write to X error and log file.
+ * - use mgaMsg for debugging. Can be disabled by undefining MGA_LOG_ENABLED.
+ */
+ 
+#ifndef HWLOG_INC
+#define HWLOG_INC
+
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#define DBG_LEVEL_BASE          1
+#define DBG_LEVEL_VERBOSE       10
+#define DBG_LEVEL_ENTEREXIT     20
+
+typedef struct
+{
+  FILE *file;
+  int   level;
+  unsigned int timeTemp;
+  char *prefix;
+} hwlog_t;
+
+extern hwlog_t hwlog;
+
+
+#ifdef HW_LOG_ENABLED
+
+/* open and close log file. */
+int  hwOpenLog(const char *filename, char *prefix);
+void hwCloseLog(void);
+
+/* return 1 if log file is succesfully opened */
+int  hwIsLogReady(void);
+
+/* set current log level to 'level'. Messages with level less than or equal
+   the current log level will be written to the log file. */
+void hwSetLogLevel(int level);
+int  hwGetLogLevel(void);
+
+/* hwLog and hwLogv write a message to the log file.	*/
+/* do not call these directly, use hwMsg() instead	*/
+void hwLog(int level, const char *format, ...);
+void hwLogv(int level, const char *format, va_list ap);
+
+int usec( void );
+
+/* hwMsg writes a message to the log file or to the standard X error file. */
+void hwMsg(int level, const char *format, ...);
+
+
+#else
+
+
+static __inline__ int hwOpenLog(const char *f, char *prefix) { hwlog.prefix=prefix; return -1; }
+#define hwIsLogReady() (0)
+#define hwGetLogLevel() (-1)
+#define hwLogLevel(n) (0)
+#define hwLog()
+#define hwMsg()
+
+#define hwCloseLog()
+#define hwSetLogLevel(x)
+#define hwLogv(l,f,a)
+
+
+#endif
+
+void hwError(const char *format, ...);
+
+
+#endif
diff --git a/src/mesa/drivers/dri/common/mm.c b/src/mesa/drivers/dri/common/mm.c
new file mode 100644
index 00000000000..b3be95471d5
--- /dev/null
+++ b/src/mesa/drivers/dri/common/mm.c
@@ -0,0 +1,197 @@
+/*
+ * GLX Hardware Device Driver common code
+ * Copyright (C) 1999 Wittawat Yamwong
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/mm.c,v 1.4 2002/10/30 12:51:27 alanh Exp $ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "mm.h"
+#include "hwlog.h"
+
+
+void mmDumpMemInfo( memHeap_t *heap )
+{
+   TMemBlock *p;
+
+   fprintf(stderr, "Memory heap %p:\n", heap);
+   if (heap == 0) {
+      fprintf(stderr, "  heap == 0\n");
+   } else {
+      p = (TMemBlock *)heap;
+      while (p) {
+	 fprintf(stderr, "  Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
+		 p->free ? '.':'U',
+		 p->reserved ? 'R':'.');
+	 p = p->next;
+      }
+   }
+   fprintf(stderr, "End of memory blocks\n");
+}
+
+memHeap_t *mmInit(int ofs,
+		  int size)
+{
+   PMemBlock blocks;
+  
+   if (size <= 0) {
+      return 0;
+   }
+   blocks = (TMemBlock *) calloc(1,sizeof(TMemBlock));
+   if (blocks) {
+      blocks->ofs = ofs;
+      blocks->size = size;
+      blocks->free = 1;
+      return (memHeap_t *)blocks;
+   } else
+      return 0;
+}
+
+
+static TMemBlock* SliceBlock(TMemBlock *p, 
+			     int startofs, int size, 
+			     int reserved, int alignment)
+{
+   TMemBlock *newblock;
+
+   /* break left */
+   if (startofs > p->ofs) {
+      newblock = (TMemBlock*) calloc(1,sizeof(TMemBlock));
+      if (!newblock)
+	 return NULL;
+      newblock->ofs = startofs;
+      newblock->size = p->size - (startofs - p->ofs);
+      newblock->free = 1;
+      newblock->next = p->next;
+      p->size -= newblock->size;
+      p->next = newblock;
+      p = newblock;
+   }
+
+   /* break right */
+   if (size < p->size) {
+      newblock = (TMemBlock*) calloc(1,sizeof(TMemBlock));
+      if (!newblock)
+	 return NULL;
+      newblock->ofs = startofs + size;
+      newblock->size = p->size - size;
+      newblock->free = 1;
+      newblock->next = p->next;
+      p->size = size;
+      p->next = newblock;
+   }
+
+   /* p = middle block */
+   p->align = alignment;
+   p->free = 0;
+   p->reserved = reserved;
+   return p;
+}
+
+PMemBlock mmAllocMem( memHeap_t *heap, int size, int align2, int startSearch)
+{
+   int mask,startofs,endofs;
+   TMemBlock *p;
+
+   if (!heap || align2 < 0 || size <= 0)
+      return NULL;
+   mask = (1 << align2)-1;
+   startofs = 0;
+   p = (TMemBlock *)heap;
+   while (p) {
+      if ((p)->free) {
+	 startofs = (p->ofs + mask) & ~mask;
+	 if ( startofs < startSearch ) {
+	    startofs = startSearch;
+	 }
+	 endofs = startofs+size;
+	 if (endofs <= (p->ofs+p->size))
+	    break;
+      }
+      p = p->next;
+   }
+   if (!p)
+      return NULL;
+   p = SliceBlock(p,startofs,size,0,mask+1);
+   p->heap = heap;
+   return p;
+}
+
+static __inline__ int Join2Blocks(TMemBlock *p)
+{
+   if (p->free && p->next && p->next->free) {
+      TMemBlock *q = p->next;
+      p->size += q->size;
+      p->next = q->next;
+      free(q);
+      return 1;
+   }
+   return 0;
+}
+
+int mmFreeMem(PMemBlock b)
+{
+   TMemBlock *p,*prev;
+
+   if (!b)
+      return 0;
+   if (!b->heap) {
+      fprintf(stderr, "no heap\n");
+      return -1;
+   }
+   p = b->heap;
+   prev = NULL;
+   while (p && p != b) {
+      prev = p;
+      p = p->next;
+   }
+   if (!p || p->free || p->reserved) {
+      if (!p)
+	 fprintf(stderr, "block not found in heap\n");
+      else if (p->free)
+	 fprintf(stderr, "block already free\n");
+      else
+	 fprintf(stderr, "block is reserved\n");
+      return -1;
+   }
+   p->free = 1;
+   Join2Blocks(p);
+   if (prev)
+      Join2Blocks(prev);
+   return 0;
+}
+
+
+void mmDestroy(memHeap_t *heap)
+{
+   TMemBlock *p,*q;
+
+   if (!heap)
+      return;
+   p = (TMemBlock *)heap;
+   while (p) {
+      q = p->next;
+      free(p);
+      p = q;
+   }
+}
diff --git a/src/mesa/drivers/dri/common/mm.h b/src/mesa/drivers/dri/common/mm.h
new file mode 100644
index 00000000000..d52871d39f3
--- /dev/null
+++ b/src/mesa/drivers/dri/common/mm.h
@@ -0,0 +1,82 @@
+/*
+ * GLX Hardware Device Driver common code
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef MM_INC
+#define MM_INC
+
+struct mem_block_t {
+  struct mem_block_t *next;
+  struct mem_block_t *heap;
+  int ofs,size;
+  int align;
+  int free:1;
+  int reserved:1;
+};
+typedef struct mem_block_t TMemBlock;
+typedef struct mem_block_t *PMemBlock;
+
+/* a heap is just the first block in a chain */
+typedef struct mem_block_t memHeap_t;
+
+static __inline__ int mmBlockSize(PMemBlock b)
+{ return b->size; }
+
+static __inline__ int mmOffset(PMemBlock b)
+{ return b->ofs; }
+
+/* 
+ * input: total size in bytes
+ * return: a heap pointer if OK, NULL if error
+ */
+memHeap_t *mmInit( int ofs, int size );
+
+/*
+ * Allocate 'size' bytes with 2^align2 bytes alignment,
+ * restrict the search to free memory after 'startSearch'
+ * depth and back buffers should be in different 4mb banks
+ * to get better page hits if possible
+ * input:	size = size of block
+ *       	align2 = 2^align2 bytes alignment
+ *		startSearch = linear offset from start of heap to begin search
+ * return: pointer to the allocated block, 0 if error
+ */
+PMemBlock  mmAllocMem( memHeap_t *heap, int size, int align2, 
+		       int startSearch );
+
+/*
+ * Free block starts at offset
+ * input: pointer to a block
+ * return: 0 if OK, -1 if error
+ */
+int  mmFreeMem( PMemBlock b );
+
+/*
+ * destroy MM
+ */
+void mmDestroy( memHeap_t *mmInit );
+
+/* For debuging purpose. */
+void mmDumpMemInfo( memHeap_t *mmInit );
+
+#endif
diff --git a/src/mesa/drivers/dri/common/mmx.h b/src/mesa/drivers/dri/common/mmx.h
new file mode 100644
index 00000000000..49ce7e3e342
--- /dev/null
+++ b/src/mesa/drivers/dri/common/mmx.h
@@ -0,0 +1,560 @@
+/*	mmx.h
+
+	MultiMedia eXtensions GCC interface library for IA32.
+
+	To use this library, simply include this header file
+	and compile with GCC.  You MUST have inlining enabled
+	in order for mmx_ok() to work; this can be done by
+	simply using -O on the GCC command line.
+
+	Compiling with -DMMX_TRACE will cause detailed trace
+	output to be sent to stderr for each mmx operation.
+	This adds lots of code, and obviously slows execution to
+	a crawl, but can be very useful for debugging.
+
+	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+	AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+	1997-98 by H. Dietz and R. Fisher
+
+ History:
+	97-98*	R.Fisher	Early versions
+	980501	R.Fisher	Original Release
+	980611*	H.Dietz		Rewrite, correctly implementing inlines, and
+		R.Fisher	 including direct register accesses.
+	980616	R.Fisher	Release of 980611 as 980616.
+	980714	R.Fisher	Minor corrections to Makefile, etc.
+	980715	R.Fisher	mmx_ok() now prevents optimizer from using
+				 clobbered values.
+				mmx_ok() now checks if cpuid instruction is
+				 available before trying to use it.
+	980726*	R.Fisher	mm_support() searches for AMD 3DNow, Cyrix
+				 Extended MMX, and standard MMX.  It returns a
+				 value which is positive if any of these are
+				 supported, and can be masked with constants to
+				 see which.  mmx_ok() is now a call to this
+	980726*	R.Fisher	Added i2r support for shift functions
+	980919	R.Fisher	Fixed AMD extended feature recognition bug.
+	980921	R.Fisher	Added definition/check for _MMX_H.
+				Added "float s[2]" to mmx_t for use with
+				  3DNow and EMMX.  So same mmx_t can be used.
+	981013	R.Fisher	Fixed cpuid function 1 bug (looked at wrong reg)
+				Fixed psllq_i2r error in mmxtest.c
+
+	* Unreleased (internal or interim) versions
+
+ Notes:
+	It appears that the latest gas has the pand problem fixed, therefore
+	  I'll undefine BROKEN_PAND by default.
+	String compares may be quicker than the multiple test/jumps in vendor
+	  test sequence in mmx_ok(), but I'm not concerned with that right now.
+
+ Acknowledgments:
+	Jussi Laako for pointing out the errors ultimately found to be
+	  connected to the failure to notify the optimizer of clobbered values.
+	Roger Hardiman for reminding us that CPUID isn't everywhere, and that
+	  someone may actually try to use this on a machine without CPUID.
+	  Also for suggesting code for checking this.
+	Robert Dale for pointing out the AMD recognition bug.
+	Jimmy Mayfield and Carl Witty for pointing out the Intel recognition
+	  bug.
+	Carl Witty for pointing out the psllq_i2r test bug.
+*/
+
+#ifndef _MMX_H
+#define _MMX_H
+
+//#define MMX_TRACE
+
+/*	Warning:  at this writing, the version of GAS packaged
+	with most Linux distributions does not handle the
+	parallel AND operation mnemonic correctly.  If the
+	symbol BROKEN_PAND is defined, a slower alternative
+	coding will be used.  If execution of mmxtest results
+	in an illegal instruction fault, define this symbol.
+*/
+#undef	BROKEN_PAND
+
+
+/*	The type of an value that fits in an MMX register
+	(note that long long constant values MUST be suffixed
+	 by LL and unsigned long long values by ULL, lest
+	 they be truncated by the compiler)
+*/
+typedef	union {
+	long long		q;	/* Quadword (64-bit) value */
+	unsigned long long	uq;	/* Unsigned Quadword */
+	int			d[2];	/* 2 Doubleword (32-bit) values */
+	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
+	short			w[4];	/* 4 Word (16-bit) values */
+	unsigned short		uw[4];	/* 4 Unsigned Word */
+	char			b[8];	/* 8 Byte (8-bit) values */
+	unsigned char		ub[8];	/* 8 Unsigned Byte */
+	float			s[2];	/* Single-precision (32-bit) value */
+} mmx_t;
+
+/*	Helper functions for the instruction macros that follow...
+	(note that memory-to-register, m2r, instructions are nearly
+	 as efficient as register-to-register, r2r, instructions;
+	 however, memory-to-memory instructions are really simulated
+	 as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef	MMX_TRACE
+
+/*	Include the stuff for printing a trace to stderr...
+*/
+
+#include <stdio.h>
+
+#define	mmx_i2r(op, imm, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (imm); \
+		fprintf(stderr, #op "_i2r(" #imm "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (imm)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_m2r(op, mem, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mem); \
+		fprintf(stderr, #op "_m2r(" #mem "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (mem)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_r2m(op, reg, mem) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2m(" #reg "=0x%016llx, ", mmx_trace.q); \
+		mmx_trace = (mem); \
+		fprintf(stderr, #mem "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %%" #reg ", %0" \
+				      : "=X" (mem) \
+				      : /* nothing */ ); \
+		mmx_trace = (mem); \
+		fprintf(stderr, #mem "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_r2r(op, regs, regd) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #regs ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2r(" #regs "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_m2m(op, mems, memd) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mems); \
+		fprintf(stderr, #op "_m2m(" #mems "=0x%016llx, ", mmx_trace.q); \
+		mmx_trace = (memd); \
+		fprintf(stderr, #memd "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+				      #op " %1, %%mm0\n\t" \
+				      "movq %%mm0, %0" \
+				      : "=X" (memd) \
+				      : "X" (mems)); \
+		mmx_trace = (memd); \
+		fprintf(stderr, #memd "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#else
+
+/*	These macros are a lot simpler without the tracing...
+*/
+
+#define	mmx_i2r(op, imm, reg) \
+	__asm__ __volatile__ (#op " $" #imm ", %%" #reg \
+			      : /* nothing */ \
+			      : /* nothing */);
+
+#define	mmx_m2r(op, mem, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	mmx_r2m(op, reg, mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=X" (mem) \
+			      : /* nothing */ )
+
+#define	mmx_r2r(op, regs, regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define	mmx_m2m(op, mems, memd) \
+	__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+			      #op " %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (memd) \
+			      : "X" (mems))
+
+#endif
+
+
+/*	1x64 MOVe Quadword
+	(this is both a load and a store...
+	 in fact, it is the only way to store)
+*/
+#define	movq_m2r(var, reg)	mmx_m2r(movq, var, reg)
+#define	movq_r2m(reg, var)	mmx_r2m(movq, reg, var)
+#define	movq_r2r(regs, regd)	mmx_r2r(movq, regs, regd)
+#define	movq(vars, vard) \
+	__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	1x32 MOVe Doubleword
+	(like movq, this is both load and store...
+	 but is most useful for moving things between
+	 mmx registers and ordinary registers)
+*/
+#define	movd_m2r(var, reg)	mmx_m2r(movd, var, reg)
+#define	movd_r2m(reg, var)	mmx_r2m(movd, reg, var)
+#define	movd_r2r(regs, regd)	mmx_r2r(movd, regs, regd)
+#define	movd(vars, vard) \
+	__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
+			      "movd %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	2x32, 4x16, and 8x8 Parallel ADDs
+*/
+#define	paddd_m2r(var, reg)	mmx_m2r(paddd, var, reg)
+#define	paddd_r2r(regs, regd)	mmx_r2r(paddd, regs, regd)
+#define	paddd(vars, vard)	mmx_m2m(paddd, vars, vard)
+
+#define	paddw_m2r(var, reg)	mmx_m2r(paddw, var, reg)
+#define	paddw_r2r(regs, regd)	mmx_r2r(paddw, regs, regd)
+#define	paddw(vars, vard)	mmx_m2m(paddw, vars, vard)
+
+#define	paddb_m2r(var, reg)	mmx_m2r(paddb, var, reg)
+#define	paddb_r2r(regs, regd)	mmx_r2r(paddb, regs, regd)
+#define	paddb(vars, vard)	mmx_m2m(paddb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Saturation arithmetic
+*/
+#define	paddsw_m2r(var, reg)	mmx_m2r(paddsw, var, reg)
+#define	paddsw_r2r(regs, regd)	mmx_r2r(paddsw, regs, regd)
+#define	paddsw(vars, vard)	mmx_m2m(paddsw, vars, vard)
+
+#define	paddsb_m2r(var, reg)	mmx_m2r(paddsb, var, reg)
+#define	paddsb_r2r(regs, regd)	mmx_r2r(paddsb, regs, regd)
+#define	paddsb(vars, vard)	mmx_m2m(paddsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
+*/
+#define	paddusw_m2r(var, reg)	mmx_m2r(paddusw, var, reg)
+#define	paddusw_r2r(regs, regd)	mmx_r2r(paddusw, regs, regd)
+#define	paddusw(vars, vard)	mmx_m2m(paddusw, vars, vard)
+
+#define	paddusb_m2r(var, reg)	mmx_m2r(paddusb, var, reg)
+#define	paddusb_r2r(regs, regd)	mmx_r2r(paddusb, regs, regd)
+#define	paddusb(vars, vard)	mmx_m2m(paddusb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel SUBs
+*/
+#define	psubd_m2r(var, reg)	mmx_m2r(psubd, var, reg)
+#define	psubd_r2r(regs, regd)	mmx_r2r(psubd, regs, regd)
+#define	psubd(vars, vard)	mmx_m2m(psubd, vars, vard)
+
+#define	psubw_m2r(var, reg)	mmx_m2r(psubw, var, reg)
+#define	psubw_r2r(regs, regd)	mmx_r2r(psubw, regs, regd)
+#define	psubw(vars, vard)	mmx_m2m(psubw, vars, vard)
+
+#define	psubb_m2r(var, reg)	mmx_m2r(psubb, var, reg)
+#define	psubb_r2r(regs, regd)	mmx_r2r(psubb, regs, regd)
+#define	psubb(vars, vard)	mmx_m2m(psubb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Saturation arithmetic
+*/
+#define	psubsw_m2r(var, reg)	mmx_m2r(psubsw, var, reg)
+#define	psubsw_r2r(regs, regd)	mmx_r2r(psubsw, regs, regd)
+#define	psubsw(vars, vard)	mmx_m2m(psubsw, vars, vard)
+
+#define	psubsb_m2r(var, reg)	mmx_m2r(psubsb, var, reg)
+#define	psubsb_r2r(regs, regd)	mmx_r2r(psubsb, regs, regd)
+#define	psubsb(vars, vard)	mmx_m2m(psubsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
+*/
+#define	psubusw_m2r(var, reg)	mmx_m2r(psubusw, var, reg)
+#define	psubusw_r2r(regs, regd)	mmx_r2r(psubusw, regs, regd)
+#define	psubusw(vars, vard)	mmx_m2m(psubusw, vars, vard)
+
+#define	psubusb_m2r(var, reg)	mmx_m2r(psubusb, var, reg)
+#define	psubusb_r2r(regs, regd)	mmx_r2r(psubusb, regs, regd)
+#define	psubusb(vars, vard)	mmx_m2m(psubusb, vars, vard)
+
+
+/*	4x16 Parallel MULs giving Low 4x16 portions of results
+*/
+#define	pmullw_m2r(var, reg)	mmx_m2r(pmullw, var, reg)
+#define	pmullw_r2r(regs, regd)	mmx_r2r(pmullw, regs, regd)
+#define	pmullw(vars, vard)	mmx_m2m(pmullw, vars, vard)
+
+
+/*	4x16 Parallel MULs giving High 4x16 portions of results
+*/
+#define	pmulhw_m2r(var, reg)	mmx_m2r(pmulhw, var, reg)
+#define	pmulhw_r2r(regs, regd)	mmx_r2r(pmulhw, regs, regd)
+#define	pmulhw(vars, vard)	mmx_m2m(pmulhw, vars, vard)
+
+
+/*	4x16->2x32 Parallel Mul-ADD
+	(muls like pmullw, then adds adjacent 16-bit fields
+	 in the multiply result to make the final 2x32 result)
+*/
+#define	pmaddwd_m2r(var, reg)	mmx_m2r(pmaddwd, var, reg)
+#define	pmaddwd_r2r(regs, regd)	mmx_r2r(pmaddwd, regs, regd)
+#define	pmaddwd(vars, vard)	mmx_m2m(pmaddwd, vars, vard)
+
+
+/*	1x64 bitwise AND
+*/
+#ifdef	BROKEN_PAND
+#define	pand_m2r(var, reg) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, reg); \
+		mmx_m2r(pandn, var, reg); \
+	}
+#define	pand_r2r(regs, regd) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, regd); \
+		mmx_r2r(pandn, regs, regd) \
+	}
+#define	pand(vars, vard) \
+	{ \
+		movq_m2r(vard, mm0); \
+		mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
+		mmx_m2r(pandn, vars, mm0); \
+		movq_r2m(mm0, vard); \
+	}
+#else
+#define	pand_m2r(var, reg)	mmx_m2r(pand, var, reg)
+#define	pand_r2r(regs, regd)	mmx_r2r(pand, regs, regd)
+#define	pand(vars, vard)	mmx_m2m(pand, vars, vard)
+#endif
+
+
+/*	1x64 bitwise AND with Not the destination
+*/
+#define	pandn_m2r(var, reg)	mmx_m2r(pandn, var, reg)
+#define	pandn_r2r(regs, regd)	mmx_r2r(pandn, regs, regd)
+#define	pandn(vars, vard)	mmx_m2m(pandn, vars, vard)
+
+
+/*	1x64 bitwise OR
+*/
+#define	por_m2r(var, reg)	mmx_m2r(por, var, reg)
+#define	por_r2r(regs, regd)	mmx_r2r(por, regs, regd)
+#define	por(vars, vard)	mmx_m2m(por, vars, vard)
+
+
+/*	1x64 bitwise eXclusive OR
+*/
+#define	pxor_m2r(var, reg)	mmx_m2r(pxor, var, reg)
+#define	pxor_r2r(regs, regd)	mmx_r2r(pxor, regs, regd)
+#define	pxor(vars, vard)	mmx_m2m(pxor, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpeqd_m2r(var, reg)	mmx_m2r(pcmpeqd, var, reg)
+#define	pcmpeqd_r2r(regs, regd)	mmx_r2r(pcmpeqd, regs, regd)
+#define	pcmpeqd(vars, vard)	mmx_m2m(pcmpeqd, vars, vard)
+
+#define	pcmpeqw_m2r(var, reg)	mmx_m2r(pcmpeqw, var, reg)
+#define	pcmpeqw_r2r(regs, regd)	mmx_r2r(pcmpeqw, regs, regd)
+#define	pcmpeqw(vars, vard)	mmx_m2m(pcmpeqw, vars, vard)
+
+#define	pcmpeqb_m2r(var, reg)	mmx_m2r(pcmpeqb, var, reg)
+#define	pcmpeqb_r2r(regs, regd)	mmx_r2r(pcmpeqb, regs, regd)
+#define	pcmpeqb(vars, vard)	mmx_m2m(pcmpeqb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpgtd_m2r(var, reg)	mmx_m2r(pcmpgtd, var, reg)
+#define	pcmpgtd_r2r(regs, regd)	mmx_r2r(pcmpgtd, regs, regd)
+#define	pcmpgtd(vars, vard)	mmx_m2m(pcmpgtd, vars, vard)
+
+#define	pcmpgtw_m2r(var, reg)	mmx_m2r(pcmpgtw, var, reg)
+#define	pcmpgtw_r2r(regs, regd)	mmx_r2r(pcmpgtw, regs, regd)
+#define	pcmpgtw(vars, vard)	mmx_m2m(pcmpgtw, vars, vard)
+
+#define	pcmpgtb_m2r(var, reg)	mmx_m2r(pcmpgtb, var, reg)
+#define	pcmpgtb_r2r(regs, regd)	mmx_r2r(pcmpgtb, regs, regd)
+#define	pcmpgtb(vars, vard)	mmx_m2m(pcmpgtb, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Left Logical
+*/
+#define	psllq_i2r(imm, reg)	mmx_i2r(psllq, imm, reg)
+#define	psllq_m2r(var, reg)	mmx_m2r(psllq, var, reg)
+#define	psllq_r2r(regs, regd)	mmx_r2r(psllq, regs, regd)
+#define	psllq(vars, vard)	mmx_m2m(psllq, vars, vard)
+
+#define	pslld_i2r(imm, reg)	mmx_i2r(pslld, imm, reg)
+#define	pslld_m2r(var, reg)	mmx_m2r(pslld, var, reg)
+#define	pslld_r2r(regs, regd)	mmx_r2r(pslld, regs, regd)
+#define	pslld(vars, vard)	mmx_m2m(pslld, vars, vard)
+
+#define	psllw_i2r(imm, reg)	mmx_i2r(psllw, imm, reg)
+#define	psllw_m2r(var, reg)	mmx_m2r(psllw, var, reg)
+#define	psllw_r2r(regs, regd)	mmx_r2r(psllw, regs, regd)
+#define	psllw(vars, vard)	mmx_m2m(psllw, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Right Logical
+*/
+#define	psrlq_i2r(imm, reg)	mmx_i2r(psrlq, imm, reg)
+#define	psrlq_m2r(var, reg)	mmx_m2r(psrlq, var, reg)
+#define	psrlq_r2r(regs, regd)	mmx_r2r(psrlq, regs, regd)
+#define	psrlq(vars, vard)	mmx_m2m(psrlq, vars, vard)
+
+#define	psrld_i2r(imm, reg)	mmx_i2r(psrld, imm, reg)
+#define	psrld_m2r(var, reg)	mmx_m2r(psrld, var, reg)
+#define	psrld_r2r(regs, regd)	mmx_r2r(psrld, regs, regd)
+#define	psrld(vars, vard)	mmx_m2m(psrld, vars, vard)
+
+#define	psrlw_i2r(imm, reg)	mmx_i2r(psrlw, imm, reg)
+#define	psrlw_m2r(var, reg)	mmx_m2r(psrlw, var, reg)
+#define	psrlw_r2r(regs, regd)	mmx_r2r(psrlw, regs, regd)
+#define	psrlw(vars, vard)	mmx_m2m(psrlw, vars, vard)
+
+
+/*	2x32 and 4x16 Parallel Shift Right Arithmetic
+*/
+#define	psrad_i2r(imm, reg)	mmx_i2r(psrad, imm, reg)
+#define	psrad_m2r(var, reg)	mmx_m2r(psrad, var, reg)
+#define	psrad_r2r(regs, regd)	mmx_r2r(psrad, regs, regd)
+#define	psrad(vars, vard)	mmx_m2m(psrad, vars, vard)
+
+#define	psraw_i2r(imm, reg)	mmx_i2r(psraw, imm, reg)
+#define	psraw_m2r(var, reg)	mmx_m2r(psraw, var, reg)
+#define	psraw_r2r(regs, regd)	mmx_r2r(psraw, regs, regd)
+#define	psraw(vars, vard)	mmx_m2m(psraw, vars, vard)
+
+
+/*	2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packssdw_m2r(var, reg)	mmx_m2r(packssdw, var, reg)
+#define	packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
+#define	packssdw(vars, vard)	mmx_m2m(packssdw, vars, vard)
+
+#define	packsswb_m2r(var, reg)	mmx_m2r(packsswb, var, reg)
+#define	packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
+#define	packsswb(vars, vard)	mmx_m2m(packsswb, vars, vard)
+
+
+/*	4x16->8x8 PACK and Unsigned Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packuswb_m2r(var, reg)	mmx_m2r(packuswb, var, reg)
+#define	packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
+#define	packuswb(vars, vard)	mmx_m2m(packuswb, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
+	(interleaves low half of dest with low half of source
+	 as padding in each result field)
+*/
+#define	punpckldq_m2r(var, reg)	mmx_m2r(punpckldq, var, reg)
+#define	punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
+#define	punpckldq(vars, vard)	mmx_m2m(punpckldq, vars, vard)
+
+#define	punpcklwd_m2r(var, reg)	mmx_m2r(punpcklwd, var, reg)
+#define	punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
+#define	punpcklwd(vars, vard)	mmx_m2m(punpcklwd, vars, vard)
+
+#define	punpcklbw_m2r(var, reg)	mmx_m2r(punpcklbw, var, reg)
+#define	punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
+#define	punpcklbw(vars, vard)	mmx_m2m(punpcklbw, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
+	(interleaves high half of dest with high half of source
+	 as padding in each result field)
+*/
+#define	punpckhdq_m2r(var, reg)	mmx_m2r(punpckhdq, var, reg)
+#define	punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
+#define	punpckhdq(vars, vard)	mmx_m2m(punpckhdq, vars, vard)
+
+#define	punpckhwd_m2r(var, reg)	mmx_m2r(punpckhwd, var, reg)
+#define	punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
+#define	punpckhwd(vars, vard)	mmx_m2m(punpckhwd, vars, vard)
+
+#define	punpckhbw_m2r(var, reg)	mmx_m2r(punpckhbw, var, reg)
+#define	punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
+#define	punpckhbw(vars, vard)	mmx_m2m(punpckhbw, vars, vard)
+
+
+/*	Empty MMx State
+	(used to clean-up when going from mmx to float use
+	 of the registers that are shared by both; note that
+	 there is no float-to-mmx operation needed, because
+	 only the float tag word info is corruptible)
+*/
+#ifdef	MMX_TRACE
+
+#define	emms() \
+	{ \
+		fprintf(stderr, "emms()\n"); \
+		__asm__ __volatile__ ("emms"); \
+	}
+
+#else
+
+#define	emms()			__asm__ __volatile__ ("emms")
+
+#endif
+
+#endif
+
diff --git a/src/mesa/drivers/dri/common/spantmp.h b/src/mesa/drivers/dri/common/spantmp.h
new file mode 100644
index 00000000000..888be0465e5
--- /dev/null
+++ b/src/mesa/drivers/dri/common/spantmp.h
@@ -0,0 +1,259 @@
+#ifndef DBG
+#define DBG 0
+#endif
+
+#ifndef HW_WRITE_LOCK
+#define HW_WRITE_LOCK()		HW_LOCK()
+#endif
+
+#ifndef HW_WRITE_UNLOCK
+#define HW_WRITE_UNLOCK()	HW_UNLOCK()
+#endif
+
+#ifndef HW_READ_LOCK
+#define HW_READ_LOCK()		HW_LOCK()
+#endif
+
+#ifndef HW_READ_UNLOCK
+#define HW_READ_UNLOCK()	HW_UNLOCK()
+#endif
+
+#ifndef HW_READ_CLIPLOOP
+#define HW_READ_CLIPLOOP()	HW_CLIPLOOP()
+#endif
+
+#ifndef HW_WRITE_CLIPLOOP
+#define HW_WRITE_CLIPLOOP()	HW_CLIPLOOP()
+#endif
+
+
+static void TAG(WriteRGBASpan)( const GLcontext *ctx,
+				GLuint n, GLint x, GLint y,
+				const GLubyte rgba[][4],
+				const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_RGBA( x1, y,
+				    rgba[i][0], rgba[i][1],
+				    rgba[i][2], rgba[i][3] );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_RGBA( x1, y,
+				 rgba[i][0], rgba[i][1],
+				 rgba[i][2], rgba[i][3] );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteRGBSpan)( const GLcontext *ctx,
+			       GLuint n, GLint x, GLint y,
+			       const GLubyte rgb[][3],
+			       const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteRGBAPixels)( const GLcontext *ctx,
+			       GLuint n,
+			       const GLint x[],
+			       const GLint y[],
+			       const GLubyte rgba[][4],
+			       const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint i;
+	 LOCAL_VARS;
+
+	 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+	       {
+		  if (mask[i]) {
+		     const int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL(x[i],fy))
+			WRITE_RGBA( x[i], fy,
+				    rgba[i][0], rgba[i][1],
+				    rgba[i][2], rgba[i][3] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteMonoRGBASpan)( const GLcontext *ctx,	
+				    GLuint n, GLint x, GLint y, 
+				    const GLchan color[4],
+				    const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+	 INIT_MONO_PIXEL(p, color);
+
+	 y = Y_FLIP( y );
+
+	 if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;n1>0;i++,x1++,n1--)
+		  if (mask[i])
+		     WRITE_PIXEL( x1, y, p );
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteMonoRGBAPixels)( const GLcontext *ctx,
+				      GLuint n,
+				      const GLint x[], const GLint y[],
+				      const GLchan color[],
+				      const GLubyte mask[] ) 
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint i;
+	 LOCAL_VARS;
+	 INIT_MONO_PIXEL(p, color);
+
+	 if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+		  if (mask[i]) {
+		     int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL( x[i], fy ))
+			WRITE_PIXEL( x[i], fy, p );
+		  }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(ReadRGBASpan)( const GLcontext *ctx,
+			       GLuint n, GLint x, GLint y,
+			       GLubyte rgba[][4])
+{
+   HW_READ_LOCK()
+      {
+	 GLint x1,n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
+
+	 HW_READ_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;n1>0;i++,x1++,n1--)
+		  READ_RGBA( rgba[i], x1, y );
+	    }
+         HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+static void TAG(ReadRGBAPixels)( const GLcontext *ctx,
+				 GLuint n, const GLint x[], const GLint y[],
+				 GLubyte rgba[][4], const GLubyte mask[] )
+{
+   HW_READ_LOCK()
+      {
+	 GLint i;
+	 LOCAL_VARS;
+
+	 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
+
+	 HW_READ_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+		  if (mask[i]) {
+		     int fy = Y_FLIP( y[i] );
+		     if (CLIPPIXEL( x[i], fy ))
+			READ_RGBA( rgba[i], x[i], fy );
+		  }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+
+
+#undef WRITE_PIXEL
+#undef WRITE_RGBA
+#undef READ_RGBA
+#undef TAG
diff --git a/src/mesa/drivers/dri/common/stenciltmp.h b/src/mesa/drivers/dri/common/stenciltmp.h
new file mode 100644
index 00000000000..365e9811ac3
--- /dev/null
+++ b/src/mesa/drivers/dri/common/stenciltmp.h
@@ -0,0 +1,147 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/stenciltmp.h,v 1.3 2001/03/21 16:14:20 dawes Exp $ */
+
+#ifndef DBG
+#define DBG 0
+#endif
+
+#ifndef HW_WRITE_LOCK
+#define HW_WRITE_LOCK()		HW_LOCK()
+#endif
+#ifndef HW_WRITE_UNLOCK
+#define HW_WRITE_UNLOCK()	HW_UNLOCK()
+#endif
+
+#ifndef HW_READ_LOCK
+#define HW_READ_LOCK()		HW_LOCK()
+#endif
+#ifndef HW_READ_UNLOCK
+#define HW_READ_UNLOCK()	HW_UNLOCK()
+#endif
+
+static void TAG(WriteStencilSpan)( GLcontext *ctx,
+				   GLuint n, GLint x, GLint y,
+				   const GLstencil *stencil, 
+				   const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_STENCIL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_CLIPLOOP() 
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteStencilSpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;i<n1;i++,x1++)
+		     if (mask[i])
+			WRITE_STENCIL( x1, y, stencil[i] );
+	       }
+	       else
+	       {
+		  for (;i<n1;i++,x1++)
+		     WRITE_STENCIL( x1, y, stencil[i] );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteStencilPixels)( GLcontext *ctx,
+				     GLuint n, 
+				     const GLint x[], 
+				     const GLint y[],
+				     const GLstencil stencil[], 
+				     const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint i;
+	 LOCAL_STENCIL_VARS;
+
+	 if (DBG) fprintf(stderr, "WriteStencilPixels\n");
+
+	 HW_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+	       {
+		  if (mask[i]) {
+		     const int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL(x[i],fy))
+			WRITE_STENCIL( x[i], fy, stencil[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+/* Read stencil spans and pixels
+ */
+static void TAG(ReadStencilSpan)( GLcontext *ctx,
+				  GLuint n, GLint x, GLint y,
+				  GLstencil stencil[])
+{
+   HW_READ_LOCK()
+      {
+	 GLint x1,n1;
+	 LOCAL_STENCIL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 if (DBG) fprintf(stderr, "ReadStencilSpan\n");
+
+	 HW_CLIPLOOP() 
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;i<n1;i++)
+		  READ_STENCIL( stencil[i], (x1+i), y );
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+static void TAG(ReadStencilPixels)( GLcontext *ctx, GLuint n, 
+				    const GLint x[], const GLint y[],
+				    GLstencil stencil[] )
+{
+   HW_READ_LOCK()
+      {
+	 GLint i;
+	 LOCAL_STENCIL_VARS;
+
+	 if (DBG) fprintf(stderr, "ReadStencilPixels\n");
+ 
+	 HW_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++) {
+		  int fy = Y_FLIP( y[i] );
+		  if (CLIPPIXEL( x[i], fy ))
+		     READ_STENCIL( stencil[i], x[i], fy );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+
+
+#undef WRITE_STENCIL
+#undef READ_STENCIL
+#undef TAG
diff --git a/src/mesa/drivers/dri/common/texmem.c b/src/mesa/drivers/dri/common/texmem.c
new file mode 100644
index 00000000000..a8878b694d6
--- /dev/null
+++ b/src/mesa/drivers/dri/common/texmem.c
@@ -0,0 +1,1173 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <[email protected]>
+ *    Keith Whitwell <[email protected]>
+ *    Kevin E. Martin <[email protected]>
+ *    Gareth Hughes <[email protected]>
+ */
+/* $XFree86:$ */
+
+/** \file texmem.c
+ * Implements all of the device-independent texture memory management.
+ * 
+ * Currently, only a simple LRU texture memory management policy is
+ * implemented.  In the (hopefully very near) future, better policies will be
+ * implemented.  The idea is that the DRI should be able to run in one of two
+ * modes.  In the default mode the DRI will dynamically attempt to discover
+ * the best texture management policy for the running application.  In the
+ * other mode, the user (via some sort of as yet TBD mechanism) will select
+ * a texture management policy that is known to work well with the
+ * application.
+ */
+
+#include "texmem.h"
+#include "simple_list.h"
+#include "imports.h"
+
+#include <assert.h>
+
+
+
+static unsigned dummy_swap_counter;
+
+
+/**
+ * Calculate \f$\log_2\f$ of a value.  This is a particularly poor
+ * implementation of this function.  However, since system performance is in
+ * no way dependent on this function, the slowness of the implementation is
+ * irrelevent.
+ * 
+ * \param n Value whose \f$\log_2\f$ is to be calculated
+ */
+
+static unsigned
+driLog2( unsigned n )
+{
+   unsigned   log2;
+
+
+   for ( log2 = 1 ; n > 1 ; log2++ ) {
+      n >>= 1;
+   }
+
+   return log2;
+}
+
+
+
+
+/**
+ * Determine if a texture is resident in textureable memory.  Depending on
+ * the driver, this may or may not be on-card memory.  It could be AGP memory
+ * or anyother type of memory from which the hardware can directly read
+ * texels.
+ * 
+ * This function is intended to be used as the \c IsTextureResident function
+ * in the device's \c dd_function_table.
+ * 
+ * \param ctx GL context pointer (currently unused)
+ * \param texObj Texture object to be tested
+ */
+
+GLboolean
+driIsTextureResident( GLcontext * ctx, 
+		      struct gl_texture_object * texObj )
+{
+   driTextureObject * t;
+
+
+   t = (driTextureObject *) texObj->DriverData;
+   return( (t != NULL) && (t->memBlock != NULL) );
+}
+
+
+
+
+/**
+ * (Re)initialize the global circular LRU list.  The last element
+ * in the array (\a heap->nrRegions) is the sentinal.  Keeping it
+ * at the end of the array allows the other elements of the array
+ * to be addressed rationally when looking up objects at a particular
+ * location in texture memory.
+ * 
+ * \param heap Texture heap to be reset
+ */
+
+static void resetGlobalLRU( driTexHeap * heap )
+{
+   drmTextureRegionPtr list = heap->global_regions;
+   unsigned       sz = 1U << heap->logGranularity;
+   unsigned       i;
+
+   for (i = 0 ; (i+1) * sz <= heap->size ; i++) {
+      list[i].prev = i-1;
+      list[i].next = i+1;
+      list[i].age = 0;
+   }
+
+   i--;
+   list[0].prev = heap->nrRegions;
+   list[i].prev = i-1;
+   list[i].next = heap->nrRegions;
+   list[heap->nrRegions].prev = i;
+   list[heap->nrRegions].next = 0;
+   heap->global_age[0] = 0;
+}
+
+/**
+ * Print out debugging information about the local texture LRU.
+ *
+ * \param heap Texture heap to be printed
+ * \param callername Name of calling function
+ */
+static void printLocalLRU( driTexHeap * heap, const char *callername  )
+{
+   driTextureObject *t;
+   unsigned sz = 1U << heap->logGranularity;
+
+   fprintf( stderr, "%s in %s:\nLocal LRU, heap %d:\n", 
+	    __FUNCTION__, callername, heap->heapId );
+
+   foreach ( t, &heap->texture_objects ) {
+      if (!t->memBlock)
+	 continue;
+      if (!t->tObj) {
+	 fprintf( stderr, "Placeholder (%p) %d at 0x%x sz 0x%x\n",
+		  t,
+		  t->memBlock->ofs / sz,
+		  t->memBlock->ofs,
+		  t->memBlock->size );
+      } else {
+	 fprintf( stderr, "Texture (%p) at 0x%x sz 0x%x\n",
+		  t,
+		  t->memBlock->ofs,
+		  t->memBlock->size );
+      }
+   }
+   foreach ( t, heap->swapped_objects ) {
+      if (!t->tObj) {
+	 fprintf( stderr, "Swapped Placeholder (%p)\n", t );
+      } else {
+	 fprintf( stderr, "Swapped Texture (%p)\n", t );
+      }
+   }
+
+   fprintf( stderr, "\n" );
+}
+
+/**
+ * Print out debugging information about the global texture LRU.
+ *
+ * \param heap Texture heap to be printed
+ * \param callername Name of calling function
+ */
+static void printGlobalLRU( driTexHeap * heap, const char *callername )
+{
+   drmTextureRegionPtr list = heap->global_regions;
+   int i, j;
+
+   fprintf( stderr, "%s in %s:\nGlobal LRU, heap %d list %p:\n", 
+	    __FUNCTION__, callername, heap->heapId, list );
+
+   for ( i = 0, j = heap->nrRegions ; i < heap->nrRegions ; i++ ) {
+      fprintf( stderr, "list[%d] age %d next %d prev %d in_use %d\n",
+	       j, list[j].age, list[j].next, list[j].prev, list[j].in_use );
+      j = list[j].next;
+      if ( j == heap->nrRegions ) break;
+   }
+
+   if ( j != heap->nrRegions ) {
+      fprintf( stderr, "Loop detected in global LRU\n" );
+      for ( i = 0 ; i < heap->nrRegions ; i++ ) {
+	 fprintf( stderr, "list[%d] age %d next %d prev %d in_use %d\n",
+		  i, list[i].age, list[i].next, list[i].prev, list[i].in_use );
+      }
+   }
+
+   fprintf( stderr, "\n" );
+}
+
+
+/**
+ * Called by the client whenever it touches a local texture.
+ * 
+ * \param t Texture object that the client has accessed
+ */
+
+void driUpdateTextureLRU( driTextureObject * t )
+{
+   driTexHeap   * heap;
+   drmTextureRegionPtr list;
+   unsigned   shift;
+   unsigned   start;
+   unsigned   end;
+   unsigned   i;
+
+
+   heap = t->heap;
+   if ( heap != NULL ) {
+      shift = heap->logGranularity;
+      start = t->memBlock->ofs >> shift;
+      end = (t->memBlock->ofs + t->memBlock->size - 1) >> shift;
+
+
+      heap->local_age = ++heap->global_age[0];
+      list = heap->global_regions;
+
+
+      /* Update the context's local LRU 
+       */
+
+      move_to_head( & heap->texture_objects, t );
+
+
+      for (i = start ; i <= end ; i++) {
+	 list[i].in_use = 1;
+	 list[i].age = heap->local_age;
+
+	 /* remove_from_list(i)
+	  */
+	 list[(unsigned)list[i].next].prev = list[i].prev;
+	 list[(unsigned)list[i].prev].next = list[i].next;
+
+	 /* insert_at_head(list, i)
+	  */
+	 list[i].prev = heap->nrRegions;
+	 list[i].next = list[heap->nrRegions].next;
+	 list[(unsigned)list[heap->nrRegions].next].prev = i;
+	 list[heap->nrRegions].next = i;
+      }
+
+      if ( 0 ) {
+	 printGlobalLRU( heap, __FUNCTION__ );
+	 printLocalLRU( heap, __FUNCTION__ );
+      }
+   }
+}
+
+
+
+
+/**
+ * Keep track of swapped out texture objects.
+ * 
+ * \param t Texture object to be "swapped" out of its texture heap
+ */
+
+void driSwapOutTextureObject( driTextureObject * t )
+{
+   unsigned   face;
+
+
+   if ( t->memBlock != NULL ) {
+      assert( t->heap != NULL );
+      mmFreeMem( t->memBlock );
+      t->memBlock = NULL;
+
+      if (t->timestamp > t->heap->timestamp)
+	 t->heap->timestamp = t->timestamp;
+
+      t->heap->texture_swaps[0]++;
+      move_to_tail( t->heap->swapped_objects, t );
+      t->heap = NULL;
+   }
+   else {
+      assert( t->heap == NULL );
+   }
+
+
+   for ( face = 0 ; face < 6 ; face++ ) {
+      t->dirty_images[face] = ~0;
+   }
+}
+
+
+
+
+/**
+ * Destroy hardware state associated with texture \a t.  Calls the
+ * \a destroy_texture_object method associated with the heap from which
+ * \a t was allocated.
+ * 
+ * \param t Texture object to be destroyed
+ */
+
+void driDestroyTextureObject( driTextureObject * t )
+{
+   driTexHeap * heap;
+
+
+   if ( 0 ) {
+      fprintf( stderr, "[%s:%d] freeing %p (tObj = %p, DriverData = %p)\n",
+	       __FILE__, __LINE__,
+	       t,
+	       (t != NULL) ? t->tObj : NULL,
+	       (t != NULL && t->tObj != NULL) ? t->tObj->DriverData : NULL );
+   }
+
+   if ( t != NULL ) {
+      if ( t->memBlock ) {
+	 heap = t->heap;
+	 assert( heap != NULL );
+
+	 heap->texture_swaps[0]++;
+
+	 mmFreeMem( t->memBlock );
+	 t->memBlock = NULL;
+
+	 if (t->timestamp > t->heap->timestamp)
+	    t->heap->timestamp = t->timestamp;
+
+	 heap->destroy_texture_object( heap->driverContext, t );
+	 t->heap = NULL;
+      }
+
+      if ( t->tObj != NULL ) {
+	 assert( t->tObj->DriverData == t );
+	 t->tObj->DriverData = NULL;
+      }
+
+      remove_from_list( t );
+      FREE( t );
+   }
+
+   if ( 0 ) {
+      fprintf( stderr, "[%s:%d] done freeing %p\n", __FILE__, __LINE__, t );
+   }
+}
+
+
+
+
+/**
+ * Update the local heap's representation of texture memory based on
+ * data in the SAREA.  This is done each time it is detected that some other
+ * direct rendering client has held the lock.  This pertains to both our local
+ * textures and the textures belonging to other clients.  Keep track of other
+ * client's textures by pushing a placeholder texture onto the LRU list --
+ * these are denoted by \a tObj being \a NULL.
+ * 
+ * \param heap Heap whose state is to be updated
+ * \param offset Byte offset in the heap that has been stolen
+ * \param size Size, in bytes, of the stolen block
+ * \param in_use Non-zero if the block is in-use by another context
+ */
+
+static void driTexturesGone( driTexHeap * heap, int offset, int size, 
+			     int in_use )
+{
+   driTextureObject * t;
+   driTextureObject * tmp;
+
+
+   foreach_s ( t, tmp, & heap->texture_objects ) {
+      if ( (t->memBlock->ofs < (offset + size))
+	   && ((t->memBlock->ofs + t->memBlock->size) > offset) ) {
+	 /* It overlaps - kick it out.  If the texture object is just a
+	  * place holder, then destroy it all together.  Otherwise, mark
+	  * it as being swapped out.
+	  */
+
+	 if ( t->tObj != NULL ) {
+	    driSwapOutTextureObject( t );
+	 }
+	 else {
+	    if ( in_use && 
+		 offset == t->memBlock->ofs && size == t->memBlock->size ) {
+	       /* Matching placeholder already exists */
+	       return;
+	    } else {
+	       driDestroyTextureObject( t );
+	    }
+	 }
+      }
+   }
+
+
+   if ( in_use ) {
+      t = (driTextureObject *) CALLOC( heap->texture_object_size );
+      if ( t == NULL ) return;
+
+      t->memBlock = mmAllocMem( heap->memory_heap, size, 0, offset );
+      if ( t->memBlock == NULL ) {
+	 fprintf( stderr, "Couldn't alloc placeholder: heap %u sz %x ofs %x\n", heap->heapId,
+		  (int)size, (int)offset );
+	 mmDumpMemInfo( heap->memory_heap );
+	 return;
+      }
+      t->heap = heap;
+      insert_at_head( & heap->texture_objects, t );
+   }
+}
+
+
+
+
+/**
+ * Called by the client on lock contention to determine whether textures have
+ * been stolen.  If another client has modified a region in which we have
+ * textures, then we need to figure out which of our textures have been
+ * removed and update our global LRU.
+ * 
+ * \param heap Texture heap to be updated
+ */
+
+void driAgeTextures( driTexHeap * heap )
+{
+   drmTextureRegionPtr list = heap->global_regions;
+   unsigned       sz = 1U << (heap->logGranularity);
+   unsigned       i, nr = 0;
+
+
+   /* Have to go right round from the back to ensure stuff ends up
+    * LRU in the local list...  Fix with a cursor pointer.
+    */
+
+   for (i = list[heap->nrRegions].prev ; 
+	i != heap->nrRegions && nr < heap->nrRegions ; 
+	i = list[i].prev, nr++) {
+      /* If switching texturing schemes, then the SAREA might not have been
+       * properly cleared, so we need to reset the global texture LRU.
+       */
+
+      if ( (i * sz) > heap->size ) {
+	 nr = heap->nrRegions;
+	 break;
+      }
+
+      if (list[i].age > heap->local_age) 
+	  driTexturesGone( heap, i * sz, sz, list[i].in_use); 
+   }
+
+   /* Loop or uninitialized heap detected.  Reset.
+    */
+
+   if (nr == heap->nrRegions) {
+      driTexturesGone( heap, 0, heap->size, 0);
+      resetGlobalLRU( heap );
+   }
+
+   if ( 0 ) {
+      printGlobalLRU( heap, __FUNCTION__ );
+      printLocalLRU( heap, __FUNCTION__ );
+   }
+
+   heap->local_age = heap->global_age[0];
+}
+
+
+
+
+/**
+ * Allocate memory from a texture heap to hold a texture object.  This
+ * routine will attempt to allocate memory for the texture from the heaps
+ * specified by \c heap_array in order.  That is, first it will try to
+ * allocate from \c heap_array[0], then \c heap_array[1], and so on.
+ *
+ * \param heap_array Array of pointers to texture heaps to use
+ * \param nr_heaps Number of heap pointer in \a heap_array
+ * \param t Texture object for which space is needed
+ * \return The ID of the heap from which memory was allocated, or -1 if
+ *         memory could not be allocated.
+ *
+ * \bug The replacement policy implemented by this function is horrible.
+ */
+
+
+int
+driAllocateTexture( driTexHeap * const * heap_array, unsigned nr_heaps,
+		    driTextureObject * t )
+{
+   driTexHeap       * heap;
+   driTextureObject * temp;
+   driTextureObject * cursor;
+   unsigned           id;
+
+
+   /* In case it already has texture space, initialize heap.  This also
+    * prevents GCC from issuing a warning that heap might be used
+    * uninitialized.
+    */
+
+   heap = t->heap;
+
+
+   /* Run through each of the existing heaps and try to allocate a buffer
+    * to hold the texture.
+    */
+
+   for ( id = 0 ; (t->memBlock == NULL) && (id < nr_heaps) ; id++ ) {
+      heap = heap_array[ id ];
+      if ( heap != NULL ) {
+	 t->memBlock = mmAllocMem( heap->memory_heap, t->totalSize, 
+				   heap->alignmentShift, 0 );
+      }
+   }
+
+
+   /* Kick textures out until the requested texture fits.
+    */
+
+   if ( t->memBlock == NULL ) {
+      for ( id = 0 ; (t->memBlock == NULL) && (id < nr_heaps) ; id++ ) {
+	 heap = heap_array[ id ];
+	 if ( t->totalSize <= heap->size ) { 
+
+	    for ( cursor = heap->texture_objects.prev, temp = cursor->prev;
+		  cursor != &heap->texture_objects ; 
+		  cursor = temp, temp = cursor->prev ) {
+	       
+	       /* The the LRU element.  If the texture is bound to one of
+		* the texture units, then we cannot kick it out.
+		*/
+	       if ( cursor->bound /* || cursor->reserved */ ) {
+		  continue;
+	       }
+
+	       /* If this is a placeholder, there's no need to keep it */
+	       if (cursor->tObj)
+		   driSwapOutTextureObject( cursor );
+	       else
+		   driDestroyTextureObject( cursor );
+
+	       t->memBlock = mmAllocMem( heap->memory_heap, t->totalSize, 
+					 heap->alignmentShift, 0 );
+
+	       if (t->memBlock)
+		  break;
+	    }
+	 }     /* if ( t->totalSize <= heap->size ) ... */
+      }
+   }
+
+
+   if ( t->memBlock != NULL ) {
+      /* id and heap->heapId may or may not be the same value here.
+       */
+
+      assert( heap != NULL );
+      assert( (t->heap == NULL) || (t->heap == heap) );
+
+      t->heap = heap;
+      return heap->heapId;
+   }
+   else {
+      assert( t->heap == NULL );
+
+      fprintf( stderr, "[%s:%d] unable to allocate texture\n",
+	       __FUNCTION__, __LINE__ );
+      return -1;
+   }
+}
+
+
+
+
+
+
+/**
+ * Set the location where the texture-swap counter is stored.
+ */
+
+void
+driSetTextureSwapCounterLocation( driTexHeap * heap, unsigned * counter )
+{
+   heap->texture_swaps = (counter == NULL) ? & dummy_swap_counter : counter;
+}
+
+
+
+
+/**
+ * Create a new heap for texture data.
+ * 
+ * \param heap_id             Device-dependent heap identifier.  This value
+ *                            will returned by driAllocateTexture when memory
+ *                            is allocated from this heap.
+ * \param context             Device-dependent driver context.  This is
+ *                            supplied as the first parameter to the
+ *                            \c destroy_tex_obj function.
+ * \param size                Size, in bytes, of the texture region
+ * \param alignmentShift      Alignment requirement for textures.  If textures 
+ *                            must be allocated on a 4096 byte boundry, this
+ *                            would be 12.
+ * \param nr_regions          Number of regions into which this texture space
+ *                            should be partitioned
+ * \param global_regions      Array of \c drmTextureRegion structures in the SAREA
+ * \param global_age          Pointer to the global texture age in the SAREA
+ * \param swapped_objects     Pointer to the list of texture objects that are
+ *                            not in texture memory (i.e., have been swapped
+ *                            out).
+ * \param texture_object_size Size, in bytes, of a device-dependent texture
+ *                            object
+ * \param destroy_tex_obj     Function used to destroy a device-dependent
+ *                            texture object
+ *
+ * \sa driDestroyTextureHeap
+ */
+
+driTexHeap *
+driCreateTextureHeap( unsigned heap_id, void * context, unsigned size,
+		      unsigned alignmentShift, unsigned nr_regions,
+		      drmTextureRegionPtr global_regions, unsigned * global_age,
+		      driTextureObject * swapped_objects, 
+		      unsigned texture_object_size,
+		      destroy_texture_object_t * destroy_tex_obj
+		    )
+{
+   driTexHeap * heap;
+   unsigned     l;
+    
+    
+   if ( 0 )
+       fprintf( stderr, "%s( %u, %p, %u, %u, %u )\n",
+		__FUNCTION__,
+		heap_id, context, size, alignmentShift, nr_regions );
+
+   heap = (driTexHeap *) CALLOC( sizeof( driTexHeap ) );
+   if ( heap != NULL ) {
+      l = driLog2( (size - 1) / nr_regions );
+      if ( l < alignmentShift )
+      {
+	 l = alignmentShift;
+      }
+
+      heap->logGranularity = l;
+      heap->size = size & ~((1L << l) - 1);
+
+      heap->memory_heap = mmInit( 0, heap->size );
+      if ( heap->memory_heap != NULL ) {
+	 heap->heapId = heap_id;
+	 heap->driverContext = context;
+
+	 heap->alignmentShift = alignmentShift;
+	 heap->nrRegions = nr_regions;
+	 heap->global_regions = global_regions;
+	 heap->global_age = global_age;
+	 heap->swapped_objects = swapped_objects;
+	 heap->texture_object_size = texture_object_size;
+	 heap->destroy_texture_object = destroy_tex_obj;
+
+	 /* Force global heap init */
+	 if (heap->global_age == 0)
+	     heap->local_age = ~0;
+	 else
+	     heap->local_age = 0;
+
+	 make_empty_list( & heap->texture_objects );
+	 driSetTextureSwapCounterLocation( heap, NULL );
+      }
+      else {
+	 FREE( heap );
+	 heap = NULL;
+      }
+   }
+
+
+   if ( 0 )
+       fprintf( stderr, "%s returning %p\n", __FUNCTION__, heap );
+
+   return heap;
+}
+
+
+
+
+/** Destroys a texture heap
+ * 
+ * \param heap Texture heap to be destroyed
+ */
+
+void
+driDestroyTextureHeap( driTexHeap * heap )
+{
+   driTextureObject * t;
+   driTextureObject * temp;
+
+
+   if ( heap != NULL ) {
+      foreach_s( t, temp, & heap->texture_objects ) {
+	 driDestroyTextureObject( t );
+      }
+      foreach_s( t, temp, heap->swapped_objects ) {
+	 driDestroyTextureObject( t );
+      }
+
+      mmDestroy( heap->memory_heap );
+      FREE( heap );
+   }
+}
+
+
+
+
+/****************************************************************************/
+/**
+ * Determine how many texels (including all mipmap levels) would be required
+ * for a texture map of size \f$2^^\c base_size_log2\f$ would require.
+ *
+ * \param base_size_log2 \f$log_2\f$ of the size of a side of the texture
+ * \param dimensions Number of dimensions of the texture.  Either 2 or 3.
+ * \param faces Number of faces of the texture.  Either 1 or 6 (for cube maps).
+ * \return Number of texels
+ */
+
+static unsigned
+texels_this_map_size( int base_size_log2, unsigned dimensions, unsigned faces )
+{
+   unsigned  texels;
+
+
+   assert( (faces == 1) || (faces == 6) );
+   assert( (dimensions == 2) || (dimensions == 3) );
+
+   texels = 0;
+   if ( base_size_log2 >= 0 ) {
+      texels = (1U << (dimensions * base_size_log2));
+
+      /* See http://www.mail-archive.com/[email protected]/msg03636.html
+       * for the complete explaination of why this formulation is used.
+       * Basically, the smaller mipmap levels sum to 0.333 the size of the
+       * level 0 map.  The total size is therefore the size of the map
+       * multipled by 1.333.  The +2 is there to round up.
+       */
+
+      texels = (texels * 4 * faces + 2) / 3;
+   }
+
+   return texels;
+}
+
+
+
+
+struct maps_per_heap {
+   unsigned  c[32];
+};
+
+static void
+fill_in_maximums( driTexHeap * const * heaps, unsigned nr_heaps,
+		  unsigned max_bytes_per_texel, unsigned max_size,
+		  unsigned mipmaps_at_once, unsigned dimensions,
+		  unsigned faces, struct maps_per_heap * max_textures )
+{
+   unsigned   heap;
+   unsigned   log2_size;
+   unsigned   mask;
+
+
+   /* Determine how many textures of each size can be stored in each
+    * texture heap.
+    */
+
+   for ( heap = 0 ; heap < nr_heaps ; heap++ ) {
+      if ( heaps[ heap ] == NULL ) {
+	 (void) memset( max_textures[ heap ].c, 0, 
+			sizeof( max_textures[ heap ].c ) );
+	 continue;
+      }
+
+      mask = (1U << heaps[ heap ]->logGranularity) - 1;
+
+      if ( 0 ) {
+	 fprintf( stderr, "[%s:%d] heap[%u] = %u bytes, mask = 0x%08x\n",
+		  __FILE__, __LINE__,
+		  heap, heaps[ heap ]->size, mask );
+      }
+
+      for ( log2_size = max_size ; log2_size > 0 ; log2_size-- ) {
+	 unsigned   total;
+
+
+	 /* Determine the total number of bytes required by a texture of
+	  * size log2_size.
+	  */
+
+	 total = texels_this_map_size( log2_size, dimensions, faces )
+	     - texels_this_map_size( log2_size - mipmaps_at_once,
+				     dimensions, faces );
+	 total *= max_bytes_per_texel;
+	 total = (total + mask) & ~mask;
+
+	 /* The number of textures of a given size that will fit in a heap
+	  * is equal to the size of the heap divided by the size of the
+	  * texture.
+	  */
+
+	 max_textures[ heap ].c[ log2_size ] = heaps[ heap ]->size / total;
+
+	 if ( 0 ) {
+	    fprintf( stderr, "[%s:%d] max_textures[%u].c[%02u] "
+		     "= 0x%08x / 0x%08x "
+		     "= %u (%u)\n",
+		     __FILE__, __LINE__,
+		     heap, log2_size,
+		     heaps[ heap ]->size, total,
+		     heaps[ heap ]->size / total,
+		     max_textures[ heap ].c[ log2_size ] );
+	 }
+      }
+   }
+}
+
+
+static unsigned
+get_max_size( unsigned nr_heaps,
+	      unsigned texture_units,
+	      unsigned max_size,
+	      int all_textures_one_heap,
+	      struct maps_per_heap * max_textures )
+{
+   unsigned   heap;
+   unsigned   log2_size;
+
+
+   /* Determine the largest texture size such that a texture of that size
+    * can be bound to each texture unit at the same time.  Some hardware
+    * may require that all textures be in the same texture heap for
+    * multitexturing.
+    */
+
+   for ( log2_size = max_size ; log2_size > 0 ; log2_size-- ) {
+      unsigned   total = 0;
+
+      for ( heap = 0 ; heap < nr_heaps ; heap++ )
+      {
+	 total += max_textures[ heap ].c[ log2_size ];
+
+	 if ( 0 ) {
+	    fprintf( stderr, "[%s:%d] max_textures[%u].c[%02u] = %u, "
+		     "total = %u\n", __FILE__, __LINE__, heap, log2_size,
+		     max_textures[ heap ].c[ log2_size ], total );
+	 }
+
+	 if ( (max_textures[ heap ].c[ log2_size ] >= texture_units)
+	      || (!all_textures_one_heap && (total >= texture_units)) ) {
+	    /* The number of mipmap levels is the log-base-2 of the
+	     * maximum texture size plus 1.  If the maximum texture size
+	     * is 1x1, the log-base-2 is 0 and 1 mipmap level (the base
+	     * level) is available.
+	     */
+
+	    return log2_size + 1;
+	 }
+      }
+   }
+
+   /* This should NEVER happen.  It should always be possible to have at
+    * *least* a 1x1 texture in memory!
+    */
+   assert( log2_size != 0 );
+   return 0;
+}
+
+#define SET_MAX(f,v) \
+    do { if ( max_sizes[v] != 0 ) { limits-> f = max_sizes[v]; } } while( 0 )
+
+#define SET_MAX_RECT(f,v) \
+    do { if ( max_sizes[v] != 0 ) { limits-> f = 1 << max_sizes[v]; } } while( 0 )
+
+
+/**
+ * Given the amount of texture memory, the number of texture units, and the
+ * maximum size of a texel, calculate the maximum texture size the driver can
+ * adverteise.
+ * 
+ * \param heaps Texture heaps for this card
+ * \param nr_heap Number of texture heaps
+ * \param limits OpenGL contants.  MaxTextureUnits must be set.
+ * \param max_bytes_per_texel Maximum size of a single texel, in bytes
+ * \param max_2D_size \f$\log_2\f$ of the maximum 2D texture size (i.e.,
+ *     1024x1024 textures, this would be 10)
+ * \param max_3D_size \f$\log_2\f$ of the maximum 3D texture size (i.e.,
+ *     1024x1024x1024 textures, this would be 10)
+ * \param max_cube_size \f$\log_2\f$ of the maximum cube texture size (i.e.,
+ *     1024x1024 textures, this would be 10)
+ * \param max_rect_size \f$\log_2\f$ of the maximum texture rectangle size
+ *     (i.e., 1024x1024 textures, this would be 10).  This is a power-of-2
+ *     even though texture rectangles need not be a power-of-2.
+ * \param mipmaps_at_once Total number of mipmaps that can be used
+ *     at one time.  For most hardware this will be \f$\c max_size + 1\f$.
+ *     For hardware that does not support mipmapping, this will be 1.
+ * \param all_textures_one_heap True if the hardware requires that all
+ *     textures be in a single texture heap for multitexturing.
+ */
+
+void
+driCalculateMaxTextureLevels( driTexHeap * const * heaps,
+			      unsigned nr_heaps,
+			      struct gl_constants * limits,
+			      unsigned max_bytes_per_texel, 
+			      unsigned max_2D_size,
+			      unsigned max_3D_size,
+			      unsigned max_cube_size,
+			      unsigned max_rect_size,
+			      unsigned mipmaps_at_once,
+			      int all_textures_one_heap )
+{
+   struct maps_per_heap  max_textures[8];
+   unsigned         i;
+   const unsigned   dimensions[4] = { 2, 3, 2, 2 };
+   const unsigned   faces[4]      = { 1, 1, 6, 1 };
+   unsigned         max_sizes[4];
+   unsigned         mipmaps[4];
+
+
+   max_sizes[0] = max_2D_size;
+   max_sizes[1] = max_3D_size;
+   max_sizes[2] = max_cube_size;
+   max_sizes[3] = max_rect_size;
+
+   mipmaps[0] = mipmaps_at_once;
+   mipmaps[1] = mipmaps_at_once;
+   mipmaps[2] = 1;
+   mipmaps[3] = mipmaps_at_once;
+
+
+   /* Calculate the maximum number of texture levels in two passes.  The
+    * first pass determines how many textures of each power-of-two size
+    * (including all mipmap levels for that size) can fit in each texture
+    * heap.  The second pass finds the largest texture size that allows
+    * a texture of that size to be bound to every texture unit.
+    */
+
+   for ( i = 0 ; i < 4 ; i++ ) {
+      if ( max_sizes[ i ] != 0 ) {
+	 fill_in_maximums( heaps, nr_heaps, max_bytes_per_texel, 
+			   max_sizes[ i ], mipmaps[ i ],
+			   dimensions[ i ], faces[ i ],
+			   max_textures );
+
+	 max_sizes[ i ] = get_max_size( nr_heaps, 
+					limits->MaxTextureUnits,
+					max_sizes[ i ],
+					all_textures_one_heap,
+					max_textures );
+      }
+   }
+
+   SET_MAX( MaxTextureLevels,        0 );
+   SET_MAX( Max3DTextureLevels,      1 );
+   SET_MAX( MaxCubeTextureLevels,    2 );
+   SET_MAX_RECT( MaxTextureRectSize, 3 );
+}
+
+
+
+
+/**
+ * Perform initial binding of default textures objects on a per unit, per
+ * texture target basis.
+ *
+ * \param ctx Current OpenGL context
+ * \param swapped List of swapped-out textures
+ * \param targets Bit-mask of value texture targets
+ */
+
+void driInitTextureObjects( GLcontext *ctx, driTextureObject * swapped,
+			    GLuint targets )
+{
+   struct gl_texture_object *texObj;
+   GLuint tmp = ctx->Texture.CurrentUnit;
+   unsigned   i;
+
+
+   for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
+      ctx->Texture.CurrentUnit = i;
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_1D) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].Current1D;
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_1D, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_2D) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].Current2D;
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_2D, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_3D) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].Current3D;
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_3D, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_CUBE) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].CurrentCubeMap;
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_CUBE_MAP_ARB, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_RECT) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].CurrentRect;
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_RECTANGLE_NV, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+   }
+
+   ctx->Texture.CurrentUnit = tmp;
+}
+
+
+
+
+/**
+ * Verify that the specified texture is in the specificed heap.
+ * 
+ * \param tex   Texture to be tested.
+ * \param heap  Texture memory heap to be tested.
+ * \return True if the texture is in the heap, false otherwise.
+ */
+
+static GLboolean
+check_in_heap( const driTextureObject * tex, const driTexHeap * heap )
+{
+#if 1
+   return tex->heap == heap;
+#else
+   driTextureObject * curr;
+
+   foreach( curr, & heap->texture_objects ) {
+      if ( curr == tex ) {
+	 break;
+      }
+   }
+
+   return curr == tex;
+#endif
+}
+
+
+
+/****************************************************************************/
+/**
+ * Validate the consistency of a set of texture heaps.
+ * Original version by Keith Whitwell in r200/r200_sanity.c.
+ */
+
+GLboolean
+driValidateTextureHeaps( driTexHeap * const * texture_heaps,
+			 unsigned nr_heaps, const driTextureObject * swapped )
+{
+   driTextureObject *t;
+   unsigned  i;
+
+   for ( i = 0 ; i < nr_heaps ; i++ ) {
+      int last_end = 0;
+      unsigned textures_in_heap = 0;
+      unsigned blocks_in_mempool = 0;
+      const driTexHeap * heap = texture_heaps[i];
+      const memHeap_t * p = heap->memory_heap;
+
+      /* Check each texture object has a MemBlock, and is linked into
+       * the correct heap.  
+       *
+       * Check the texobj base address corresponds to the MemBlock
+       * range.  Check the texobj size (recalculate???) fits within
+       * the MemBlock.
+       *
+       * Count the number of texobj's using this heap.
+       */
+
+      foreach ( t, &heap->texture_objects ) {
+	 if ( !check_in_heap( t, heap ) ) {
+	    fprintf( stderr, "%s memory block for texture object @ %p not "
+		     "found in heap #%d\n",
+		     __FUNCTION__, t, i );
+	    return GL_FALSE;
+	 }
+
+
+	 if ( t->totalSize > t->memBlock->size ) {
+	    fprintf( stderr, "%s: Memory block for texture object @ %p is "
+		     "only %u bytes, but %u are required\n",
+		     __FUNCTION__, t, t->totalSize, t->memBlock->size );
+	    return GL_FALSE;
+	 }
+
+	 textures_in_heap++;
+      }
+
+      /* Validate the contents of the heap:
+       *   - Ordering
+       *   - Overlaps
+       *   - Bounds
+       */
+
+      while ( p != NULL ) {
+	 if (p->reserved) {
+	    fprintf( stderr, "%s: Block (%08x,%x), is reserved?!\n",
+		     __FUNCTION__, p->ofs, p->size );
+	    return GL_FALSE;
+	 }
+
+	 if (p->ofs != last_end) {
+	    fprintf( stderr, "%s: blocks_in_mempool = %d, last_end = %d, p->ofs = %d\n",
+		     __FUNCTION__, blocks_in_mempool, last_end, p->ofs );
+	    return GL_FALSE;
+	 }
+
+	 if (!p->reserved && !p->free) {
+	    blocks_in_mempool++;
+	 }
+
+	 last_end = p->ofs + p->size;
+	 p = p->next;
+      }
+
+      if (textures_in_heap != blocks_in_mempool) {
+	 fprintf( stderr, "%s: Different number of textures objects (%u) and "
+		  "inuse memory blocks (%u)\n", 
+		  __FUNCTION__, textures_in_heap, blocks_in_mempool );
+	 return GL_FALSE;
+      }
+
+#if 0
+      fprintf( stderr, "%s: textures_in_heap = %u\n", 
+	       __FUNCTION__, textures_in_heap );
+#endif
+   }
+
+
+   /* Check swapped texobj's have zero memblocks
+    */
+   i = 0;
+   foreach ( t, swapped ) {
+      if ( t->memBlock != NULL ) {
+	 fprintf( stderr, "%s: Swapped texobj %p has non-NULL memblock %p\n",
+		  __FUNCTION__, t, t->memBlock );
+	 return GL_FALSE;
+      }
+      i++;
+   }
+
+#if 0
+   fprintf( stderr, "%s: swapped texture count = %u\n", i );
+#endif
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/common/texmem.h b/src/mesa/drivers/dri/common/texmem.h
new file mode 100644
index 00000000000..7199704612a
--- /dev/null
+++ b/src/mesa/drivers/dri/common/texmem.h
@@ -0,0 +1,293 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <[email protected]>
+ *    Keith Whitwell <[email protected]>
+ *    Kevin E. Martin <[email protected]>
+ *    Gareth Hughes <[email protected]>
+ */
+/* $XFree86:$ */
+
+/** \file texmem.h
+ * Public interface to the DRI texture memory management routines.
+ * 
+ * \sa texmem.c
+ */
+
+#ifndef DRI_TEXMEM_H
+#define DRI_TEXMEM_H
+
+#include "mtypes.h"
+#include "mm.h"
+#include "xf86drm.h"
+
+struct dri_tex_heap;
+typedef struct dri_tex_heap driTexHeap;
+
+struct dri_texture_object;
+typedef struct dri_texture_object driTextureObject;
+
+
+/**
+ * Base texture object type.  Each driver will extend this type with its own
+ * private data members.
+ */
+
+struct dri_texture_object {
+	struct dri_texture_object * next;
+	struct dri_texture_object * prev;
+
+	driTexHeap * heap;		/**< Texture heap currently stored in */
+	struct gl_texture_object * tObj;/**< Pointer to Mesa texture object
+					 * If NULL, this texture object is a
+					 * "placeholder" object representing
+					 * texture memory in use by another context.
+					 * A placeholder should have a heap and a memBlock.
+					 */
+	PMemBlock   memBlock;		/**< Memory block containing texture */
+	unsigned    bound;		/**< Texture unit currently bound to */
+
+	unsigned    totalSize;		/**< Total size of the texture,
+					 * including all mipmap levels 
+					 */
+
+	unsigned    dirty_images[6];	/**< Flags for whether or not images
+					 * need to be uploaded to local or
+					 * AGP texture space.  One flag set
+					 * for each cube face for cubic
+					 * textures.  Bit zero corresponds to
+					 * the base-level, which may or may
+					 * not be the level zero mipmap.
+					 */
+
+        unsigned    timestamp;	        /**< Timestamp used to
+					 * synchronize with 3d engine
+					 * in hardware where textures
+					 * are uploaded directly to
+					 * the framebuffer.  
+					 */
+
+        unsigned    firstLevel;         /**< Image in \c tObj->Image that
+					 * corresponds to the base-level of
+					 * this texture object.
+					 */
+
+        unsigned    lastLevel;          /**< Last image in \c tObj->Image used
+					 * by the current LOD settings of this
+					 * texture object.  This value must be
+					 * greater than or equal to
+					 * \c firstLevel.
+					 */
+};
+
+
+typedef void (destroy_texture_object_t)( void * driverContext,
+				        driTextureObject * t );
+
+/**
+ * Client-private representation of texture memory state.
+ *
+ * Clients will place one or more of these structs in their driver
+ * context struct to manage one or more global texture heaps.
+ */
+
+struct dri_tex_heap {
+
+	/** Client-supplied heap identifier 
+	 */
+	unsigned heapId;	
+
+	/** Pointer to the client's private context 
+	 */
+	void *driverContext;
+
+	/** Total size of the heap, in bytes
+	 */
+	unsigned size;
+
+	/** \brief \f$log_2\f$ of size of single heap region
+	 *
+	 * Each context takes memory from the global texture heap in
+	 * \f$2^{logGranularity}\f$ byte blocks.  The value of
+	 * \a logGranularity is based on the amount of memory represented
+	 * by the heap and the maximum number of regions in the SAREA.  Given
+	 * \a b bytes of texture memory an \a n regions in the SAREA,
+	 * \a logGranularity will be \f$\lfloor\log_2( b / n )\rfloor\f$.
+	 */
+	unsigned logGranularity;
+
+	/** \brief Required alignment of allocations in this heap
+	 * 
+	 * The alignment shift is supplied to \a mmAllocMem when memory is
+	 * allocated from this heap.  The value of \a alignmentShift will
+	 * typically reflect some require of the hardware.  This value has
+	 * \b no \b relation to \a logGranularity.  \a alignmentShift is a
+	 * per-context value.
+	 *
+	 * \sa mmAllocMem
+	 */
+	unsigned alignmentShift;
+
+	/** Number of elements in global list (the SAREA).
+	 */
+	unsigned nrRegions;	 
+
+	/** Pointer to SAREA \a driTexRegion array
+	 */
+	drmTextureRegionPtr global_regions;
+
+	/** Pointer to the texture state age (generation number) in the SAREA
+	 */
+	unsigned     * global_age;
+
+	/** Local age (generation number) of texture state
+	 */
+	unsigned local_age;
+
+	/** Memory heap used to manage texture memory represented by
+	 * this texture heap.
+	 */
+	memHeap_t    * memory_heap;
+
+	/** List of objects that we currently believe to be in texture
+	 * memory.
+	 */
+	driTextureObject     texture_objects;
+    
+	/** Pointer to the list of texture objects that are not in
+	 * texture memory.
+	 */
+	driTextureObject   * swapped_objects;
+
+	/** Size of the driver-speicific texture object.
+	 */
+	unsigned       texture_object_size;
+
+
+	/**
+	 * \brief Function to destroy driver-specific texture object data.
+	 * 
+	 * This function is supplied by the driver so that the texture manager
+	 * can release all resources associated with a texture object.  This
+	 * function should only release driver-specific data.  That is,
+	 * \a driDestroyTextureObject will release the texture memory
+	 * associated with the texture object, it will release the memory
+	 * for the texture object itself, and it will unlink the texture
+	 * object from the texture object lists.
+	 *
+	 * \param driverContext Pointer to the driver supplied context
+	 * \param t Texture object that is to be destroyed
+	 * \sa driDestroyTextureObject
+	 */
+
+	destroy_texture_object_t * destroy_texture_object;
+
+
+	/**
+	 */
+	unsigned * texture_swaps;
+
+        /**
+	 * Timestamp used to synchronize with 3d engine in hardware
+	 * where textures are uploaded directly to the
+	 * framebuffer.  
+	 */
+        unsigned timestamp;
+};
+
+
+
+
+/**
+ * Called by the client on lock contention to determine whether textures have
+ * been stolen.  If another client has modified a region in which we have
+ * textures, then we need to figure out which of our textures have been
+ * removed and update our global LRU.
+ * 
+ * \param heap Texture heap to be updated
+ * \hideinitializer
+ */
+
+#define DRI_AGE_TEXTURES( heap )				\
+   do {								\
+       if ( ((heap) != NULL)					\
+	    && ((heap)->local_age != (heap)->global_age[0]) )	\
+	   driAgeTextures( heap );				\
+   } while( 0 )
+
+
+
+
+/* This should be called whenever there has been contention on the hardware
+ * lock.  driAgeTextures should not be called directly.  Instead, clients
+ * should use DRI_AGE_TEXTURES, above.
+ */
+
+void driAgeTextures( driTexHeap * heap );
+
+void driUpdateTextureLRU( driTextureObject * t );
+void driSwapOutTextureObject( driTextureObject * t );
+void driDestroyTextureObject( driTextureObject * t );
+int driAllocateTexture( driTexHeap * const * heap_array, unsigned nr_heaps,
+    driTextureObject * t );
+
+GLboolean driIsTextureResident( GLcontext * ctx, 
+    struct gl_texture_object * texObj );
+
+driTexHeap * driCreateTextureHeap( unsigned heap_id, void * context,
+    unsigned size, unsigned alignmentShift, unsigned nr_regions,
+    drmTextureRegionPtr global_regions, unsigned * global_age,
+    driTextureObject * swapped_objects, unsigned texture_object_size,
+    destroy_texture_object_t * destroy_tex_obj );
+void driDestroyTextureHeap( driTexHeap * heap );
+
+void
+driCalculateMaxTextureLevels( driTexHeap * const * heaps,
+			      unsigned nr_heaps,
+			      struct gl_constants * limits,
+			      unsigned max_bytes_per_texel, 
+			      unsigned max_2D_size,
+			      unsigned max_3D_size,
+			      unsigned max_cube_size,
+			      unsigned max_rect_size,
+			      unsigned mipmaps_at_once,
+			      int all_textures_one_heap );
+
+void
+driSetTextureSwapCounterLocation( driTexHeap * heap, unsigned * counter );
+
+#define DRI_TEXMGR_DO_TEXTURE_1D    0x0001
+#define DRI_TEXMGR_DO_TEXTURE_2D    0x0002
+#define DRI_TEXMGR_DO_TEXTURE_3D    0x0004
+#define DRI_TEXMGR_DO_TEXTURE_CUBE  0x0008
+#define DRI_TEXMGR_DO_TEXTURE_RECT  0x0010
+
+void driInitTextureObjects( GLcontext *ctx, driTextureObject * swapped,
+			    GLuint targets );
+
+GLboolean driValidateTextureHeaps( driTexHeap * const * texture_heaps,
+    unsigned nr_heaps, const driTextureObject * swapped );
+
+#endif /* DRI_TEXMEM_H */
diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c
new file mode 100644
index 00000000000..bc83fef1367
--- /dev/null
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -0,0 +1,186 @@
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <[email protected]>
+ */
+/* $XFree86:$ */
+
+#include <string.h>
+#include <stdlib.h>
+#include "mtypes.h"
+#include "extensions.h"
+#include "utils.h"
+
+#if defined(USE_X86_ASM)
+#include "X86/common_x86_asm.h"
+#endif
+
+unsigned
+driParseDebugString( const char * debug, 
+		     const struct dri_debug_control * control  )
+{
+   unsigned   flag;
+
+
+   flag = 0;
+   if ( debug != NULL ) {
+      while( control->string != NULL ) {
+	 if ( strstr( debug, control->string ) != NULL ) {
+	    flag |= control->flag;
+	 }
+
+	 control++;
+      }
+   }
+
+   return flag;
+}
+
+
+
+
+unsigned
+driGetRendererString( char * buffer, const char * hardware_name,
+		      const char * driver_date, GLuint agp_mode )
+{
+#ifdef USE_X86_ASM
+   char * x86_str = "";
+   char * mmx_str = "";
+   char * tdnow_str = "";
+   char * sse_str = "";
+#endif
+   unsigned   offset;
+
+
+   offset = sprintf( buffer, "Mesa DRI %s %s", hardware_name, driver_date );
+
+   /* Append any AGP-specific information.
+    */
+   switch ( agp_mode ) {
+   case 1:
+   case 2:
+   case 4:
+   case 8:
+      offset += sprintf( & buffer[ offset ], " AGP %ux", agp_mode );
+      break;
+	
+   default:
+      break;
+   }
+
+   /* Append any CPU-specific information.
+    */
+#ifdef USE_X86_ASM
+   if ( _mesa_x86_cpu_features ) {
+      x86_str = " x86";
+   }
+# ifdef USE_MMX_ASM
+   if ( cpu_has_mmx ) {
+      mmx_str = (cpu_has_mmxext) ? "/MMX+" : "/MMX";
+   }
+# endif
+# ifdef USE_3DNOW_ASM
+   if ( cpu_has_3dnow ) {
+      tdnow_str = (cpu_has_3dnowext) ? "/3DNow!+" : "/3DNow!";
+   }
+# endif
+# ifdef USE_SSE_ASM
+   if ( cpu_has_xmm ) {
+      sse_str = (cpu_has_xmm2) ? "/SSE2" : "/SSE";
+   }
+# endif
+
+   offset += sprintf( & buffer[ offset ], "%s%s%s%s", 
+		      x86_str, mmx_str, tdnow_str, sse_str );
+
+#elif defined(USE_SPARC_ASM)
+
+   offset += sprintf( & buffer[ offset ], " Sparc" );
+
+#endif
+
+   return offset;
+}
+
+
+
+
+void driInitExtensions( GLcontext * ctx,
+			const char * const extensions_to_enable[],
+			GLboolean  enable_imaging )
+{
+   unsigned   i;
+
+   if ( enable_imaging ) {
+      _mesa_enable_imaging_extensions( ctx );
+   }
+
+   for ( i = 0 ; extensions_to_enable[i] != NULL ; i++ ) {
+      _mesa_enable_extension( ctx, extensions_to_enable[i] );
+   }
+}
+
+
+
+
+GLboolean
+driCheckDriDdxDrmVersions(__DRIscreenPrivate *sPriv,
+			  const char * driver_name,
+			  int dri_major, int dri_minor,
+			  int ddx_major, int ddx_minor,
+			  int drm_major, int drm_minor)
+{
+   static const char format[] = "%s DRI driver expected %s version %d.%d.x "
+       "but got version %d.%d.%d";
+   int major, minor, patch;
+
+#ifndef _SOLO
+   /* Check the DRI version */
+   if (XF86DRIQueryVersion(sPriv->display, &major, &minor, &patch)) {
+      if (major != dri_major || minor < dri_minor) {
+	 __driUtilMessage(format, "DRI", driver_name, dri_major, dri_minor,
+			  major, minor, patch);
+	 return GL_FALSE;
+      }
+   }
+#else
+   (void)major;(void)minor;(void)patch;
+#endif
+   
+   /* Check that the DDX driver version is compatible */
+   if (sPriv->ddxMajor != ddx_major || sPriv->ddxMinor < ddx_minor) {
+      __driUtilMessage(format, "DDX", driver_name, ddx_major, ddx_minor,
+		       sPriv->ddxMajor, sPriv->ddxMinor, sPriv->ddxPatch);
+      return GL_FALSE;
+   }
+
+   /* Check that the DRM driver version is compatible */
+   if (sPriv->drmMajor != drm_major || sPriv->drmMinor < drm_minor) {
+      __driUtilMessage(format, "DRM", driver_name, drm_major, drm_minor,
+		       sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch);
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/common/utils.h b/src/mesa/drivers/dri/common/utils.h
new file mode 100644
index 00000000000..d6506c5b88c
--- /dev/null
+++ b/src/mesa/drivers/dri/common/utils.h
@@ -0,0 +1,54 @@
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <[email protected]>
+ */
+/* $XFree86:$ */
+
+#ifndef DRI_DEBUG_H
+#define DRI_DEBUG_H
+
+#include "context.h"
+#include "dri_util.h"
+
+struct dri_debug_control
+{
+    const char * string;
+    unsigned     flag;
+};
+
+extern unsigned driParseDebugString( const char * debug,
+    const struct dri_debug_control * control );
+
+extern unsigned driGetRendererString( char * buffer,
+    const char * hardware_name, const char * driver_date, GLuint agp_mode );
+
+extern void driInitExtensions( GLcontext * ctx, 
+    const char * const card_extensions[], GLboolean enable_imaging );
+
+extern GLboolean driCheckDriDdxDrmVersions( __DRIscreenPrivate *sPriv,
+    const char * driver_name, int dri_major, int dri_minor,
+    int ddx_major, int ddx_minor, int drm_major, int drm_minor );
+
+#endif /* DRI_DEBUG_H */
diff --git a/src/mesa/drivers/dri/common/vblank.c b/src/mesa/drivers/dri/common/vblank.c
new file mode 100644
index 00000000000..e91dd65260c
--- /dev/null
+++ b/src/mesa/drivers/dri/common/vblank.c
@@ -0,0 +1,325 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <[email protected]>
+ */
+/* $XFree86:$ */
+
+#include "glheader.h"
+#include "xf86drm.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "dd.h"
+#include "vblank.h"
+
+
+/****************************************************************************/
+/**
+ * Get the current MSC refresh counter.
+ *
+ * Stores the 64-bit count of vertical refreshes since some (arbitrary)
+ * point in time in \c count.  Unless the value wraps around, which it
+ * may, it will never decrease.
+ *
+ * \warning This function is called from \c glXGetVideoSyncSGI, which expects
+ * a \c count of type \c unsigned (32-bit), and \c glXGetSyncValuesOML, which 
+ * expects a \c count of type \c int64_t (signed 64-bit).  The kernel ioctl 
+ * currently always returns a \c sequence of type \c unsigned.
+ *
+ * \param priv   Pointer to the DRI screen private struct.
+ * \param count  Storage to hold MSC counter.
+ * \return       Zero is returned on success.  A negative errno value
+ *               is returned on failure.
+ */
+int driGetMSC32( __DRIscreenPrivate * priv, int64_t * count )
+{
+   drmVBlank vbl;
+   int ret;
+
+   /* Don't wait for anything.  Just get the current refresh count. */
+
+   vbl.request.type = DRM_VBLANK_RELATIVE;
+   vbl.request.sequence = 0;
+
+   ret = drmWaitVBlank( priv->fd, &vbl );
+   *count = (int64_t)vbl.reply.sequence;
+
+   return ret;
+}
+
+
+/****************************************************************************/
+/**
+ * Wait for a specified refresh count.  This implements most of the
+ * functionality of \c glXWaitForMscOML from the GLX_OML_sync_control spec.
+ * Waits for the \c target_msc refresh.  If that has already passed, it
+ * waits until \f$(MSC \bmod divisor)\f$ is equal to \c remainder.  If 
+ * \c target_msc is 0, use the behavior of glXWaitVideoSyncSGI(), which
+ * omits the initial check against a target MSC value.
+ * 
+ * This function is actually something of a hack.  The problem is that, at
+ * the time of this writing, none of the existing DRM modules support an
+ * ioctl that returns a 64-bit count (at least not on 32-bit platforms).
+ * However, this function exists to support a GLX function that requires
+ * the use of 64-bit counts.  As such, there is a little bit of ugly
+ * hackery at the end of this function to make the 32-bit count act like
+ * a 64-bit count.  There are still some cases where this will break, but
+ * I believe it catches the most common cases.
+ *
+ * The real solution is to provide an ioctl that uses a 64-bit count.
+ *
+ * \param dpy         Pointer to the \c Display.
+ * \param priv        Pointer to the DRI drawable private.
+ * \param target_msc  Desired refresh count to wait for.  A value of 0
+ *                    means to use the glXWaitVideoSyncSGI() behavior.
+ * \param divisor     MSC divisor if \c target_msc is already reached.
+ * \param remainder   Desired MSC remainder if \c target_msc is already
+ *                    reached.
+ * \param msc         Buffer to hold MSC when done waiting.
+ *
+ * \return            Zero on success or \c GLX_BAD_CONTEXT on failure.
+ */
+
+int driWaitForMSC32( __DRIdrawablePrivate *priv,
+		     int64_t target_msc, int64_t divisor, int64_t remainder,
+		     int64_t * msc )
+{
+   drmVBlank vbl;
+
+
+   if ( divisor != 0 ) {
+      unsigned int target = (unsigned int)target_msc;
+      unsigned int next = target;
+      unsigned int r;
+      int dont_wait = (target_msc == 0);
+
+      do {
+         /* dont_wait means we're using the glXWaitVideoSyncSGI() behavior.
+          * The first time around, just get the current count and proceed 
+          * to the test for (MSC % divisor) == remainder.
+          */
+         vbl.request.type = dont_wait ? DRM_VBLANK_RELATIVE :
+                                        DRM_VBLANK_ABSOLUTE;
+         vbl.request.sequence = next;
+
+	 if ( drmWaitVBlank( priv->driScreenPriv->fd, &vbl ) != 0 ) {
+	    /* FIXME: This doesn't seem like the right thing to return here.
+	     */
+#ifndef _SOLO
+	    return GLX_BAD_CONTEXT;
+#else
+	    return -1;
+#endif
+	 }
+
+         dont_wait = 0;
+         if (target_msc != 0 && vbl.reply.sequence == target)
+            break;
+
+         /* Assuming the wait-done test fails, the next refresh to wait for
+          * will be one that satisfies (MSC % divisor) == remainder.  The
+          * value (MSC - (MSC % divisor) + remainder) is the refresh value 
+          * closest to the current value that would satisfy the equation.  
+          * If this refresh has already happened, we add divisor to obtain 
+          * the next refresh after the current one that will satisfy it.
+          */
+         r = (vbl.reply.sequence % (unsigned int)divisor);
+         next = (vbl.reply.sequence - r + (unsigned int)remainder);
+         if (next <= vbl.reply.sequence) next += (unsigned int)divisor;
+
+      } while ( r != (unsigned int)remainder );
+   }
+   else {
+      /* If the \c divisor is zero, just wait until the MSC is greater
+       * than or equal to \c target_msc.
+       */
+
+      vbl.request.type = DRM_VBLANK_ABSOLUTE;
+      vbl.request.sequence = target_msc;
+
+      if ( drmWaitVBlank( priv->driScreenPriv->fd, &vbl ) != 0 ) {
+	 /* FIXME: This doesn't seem like the right thing to return here.
+	  */
+#ifndef _SOLO
+	    return GLX_BAD_CONTEXT;
+#else
+	    return -1;
+#endif
+      }
+   }
+
+   *msc  = (target_msc & 0xffffffff00000000LL);
+   *msc |= vbl.reply.sequence;
+   if ( *msc < target_msc ) {
+      *msc += 0x0000000100000000LL;
+   }
+
+   return 0;
+}
+
+
+/****************************************************************************/
+/**
+ * Gets a set of default vertical-blank-wait flags based on the internal GLX
+ * API version and several environment variables.
+ */
+
+GLuint driGetDefaultVBlankFlags( void )
+{
+   GLuint  flags = 0;
+
+
+   flags |= (driCompareGLXAPIVersion( 20030317 ) >= 0) 
+       ? VBLANK_FLAG_INTERVAL : 0;
+   flags |= (getenv("LIBGL_SYNC_REFRESH") != NULL)
+       ? VBLANK_FLAG_SYNC : 0;
+   flags |= (getenv("LIBGL_THROTTLE_REFRESH") != NULL)
+       ? VBLANK_FLAG_THROTTLE : 0;
+
+   return flags;
+}
+
+
+/****************************************************************************/
+/**
+ * Wrapper to call \c drmWaitVBlank.  The main purpose of this function is to
+ * wrap the error message logging.  The error message should only be logged
+ * the first time the \c drmWaitVBlank fails.  If \c drmWaitVBlank is
+ * successful, \c vbl_seq will be set the sequence value in the reply.
+ *
+ * \param vbl      Pointer to drmVBlank packet desribing how to wait.
+ * \param vbl_seq  Location to store the current refresh counter.
+ * \param fd       File descriptor use to call into the DRM.
+ * \return         Zero on success or -1 on failure.
+ */
+
+static int do_wait( drmVBlank * vbl, GLuint * vbl_seq, int fd )
+{
+   int   ret;
+
+
+   ret = drmWaitVBlank( fd, vbl );
+   if ( ret != 0 ) {
+      static GLboolean first_time = GL_TRUE;
+
+      if ( first_time ) {
+	 fprintf(stderr, 
+		 "%s: drmWaitVBlank returned %d, IRQs don't seem to be"
+		 " working correctly.\nTry running with LIBGL_THROTTLE_REFRESH"
+		 " and LIBL_SYNC_REFRESH unset.\n", __FUNCTION__, ret);
+	 first_time = GL_FALSE;
+      }
+
+      return -1;
+   }
+
+   *vbl_seq = vbl->reply.sequence;
+   return 0;
+}
+
+
+/****************************************************************************/
+/**
+ * Waits for the vertical blank for use with glXSwapBuffers.
+ * 
+ * \param vbl_seq  Vertical blank sequence number (MSC) after the last buffer
+ *                 swap.  Updated after this wait.
+ * \param flags    \c VBLANK_FLAG bits that control how long to wait.
+ * \param missed_deadline  Set to \c GL_TRUE if the MSC after waiting is later
+ *                 than the "target" based on \c flags.  The idea is that if
+ *                 \c missed_deadline is set, then the application is not 
+ *                 achieving its desired framerate.
+ * \return         Zero on success, -1 on error.
+ */
+
+int
+driWaitForVBlank( const  __DRIdrawablePrivate *priv, GLuint * vbl_seq,
+		  GLuint flags, GLboolean * missed_deadline )
+{
+   drmVBlank vbl;
+   unsigned   original_seq;
+   unsigned   deadline;
+   unsigned   interval;
+
+
+   *missed_deadline = GL_FALSE;
+   if ( (flags & VBLANK_FLAG_NO_IRQ) != 0 ) {
+      return 0;
+   }
+
+
+   /* VBLANK_FLAG_SYNC means to wait for at least one vertical blank.  If
+    * that flag is not set, do a fake wait for zero vertical blanking
+    * periods so that we can get the current MSC.
+    *
+    * VBLANK_FLAG_INTERVAL and VBLANK_FLAG_THROTTLE mean to wait for at
+    * least one vertical blank since the last wait.  Since do_wait modifies
+    * vbl_seq, we have to save the original value of vbl_seq for the
+    * VBLANK_FLAG_INTERVAL / VBLANK_FLAG_THROTTLE calculation later.
+    */
+
+   original_seq = *vbl_seq;
+
+   vbl.request.sequence = ((flags & VBLANK_FLAG_SYNC) != 0) ? 1 : 0;
+   vbl.request.type = DRM_VBLANK_RELATIVE;
+      
+   if ( do_wait( & vbl, vbl_seq, priv->driScreenPriv->fd ) != 0 ) {
+      return -1;
+   }
+
+	
+   vbl.request.type = DRM_VBLANK_ABSOLUTE;
+
+   if ( (flags & VBLANK_FLAG_INTERVAL) != 0 ) {
+#ifndef _SOLO
+      interval = priv->pdraw->swap_interval;
+#else
+      interval = 0;
+#endif
+   }
+   else if ( (flags & VBLANK_FLAG_THROTTLE) != 0 ) {
+      interval = 1;
+   }
+   else {
+      interval = 0;
+   }
+
+
+   /* Wait until the next vertical blank.  If the interval is zero, then
+    * the deadline is one vertical blank after the previous wait.
+    */
+
+   vbl.request.sequence = original_seq + interval;
+   if ( *vbl_seq < vbl.request.sequence ) {
+      if ( do_wait( & vbl, vbl_seq, priv->driScreenPriv->fd ) != 0 ) {
+	 return -1;
+      }
+   }
+
+   deadline = original_seq + ((interval == 0) ? 1 : interval);
+   *missed_deadline = ( *vbl_seq > deadline );
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/common/vblank.h b/src/mesa/drivers/dri/common/vblank.h
new file mode 100644
index 00000000000..3dab4ead3c2
--- /dev/null
+++ b/src/mesa/drivers/dri/common/vblank.h
@@ -0,0 +1,62 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <[email protected]>
+ */
+/* $XFree86:$ */
+
+#ifndef DRI_VBLANK_H
+#define DRI_VBLANK_H
+
+#include "context.h"
+#include "dri_util.h"
+
+#define VBLANK_FLAG_INTERVAL  (1U << 0)  /* Respect the swap_interval setting
+					  */
+#define VBLANK_FLAG_THROTTLE  (1U << 1)  /* Wait 1 refresh since last call.
+					  */
+#define VBLANK_FLAG_SYNC      (1U << 2)  /* Sync to the next refresh.
+					  */
+#define VBLANK_FLAG_NO_IRQ    (1U << 7)  /* DRM has no IRQ to wait on.
+					  */
+
+extern int driGetMSC32( __DRIscreenPrivate * priv, int64_t * count );
+extern int driWaitForMSC32( __DRIdrawablePrivate *priv,
+    int64_t target_msc, int64_t divisor, int64_t remainder, int64_t * msc );
+extern GLuint driGetDefaultVBlankFlags( void );
+extern int driWaitForVBlank( const __DRIdrawablePrivate *priv,
+    GLuint * vbl_seq, GLuint flags, GLboolean * missed_deadline );
+
+#undef usleep
+#include <unistd.h>  /* for usleep() */
+
+#define DO_USLEEP(nr)							\
+   do {								 	\
+      if (0) fprintf(stderr, "%s: usleep for %u\n", __FUNCTION__, nr );	\
+      if (1) usleep( nr );						\
+      sched_yield();							\
+   } while( 0 )
+
+#endif /* DRI_VBLANK_H */