92 files changed, 2230 insertions, 1006 deletions
diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile
index 9f4bd1699f9..beaf9a4b129 100644
--- a/src/mesa/drivers/dri/i915/Makefile
+++ b/src/mesa/drivers/dri/i915/Makefile
@@ -19,6 +19,7 @@ DRIVER_SOURCES = \
 	intel_batchbuffer.c \
 	intel_clear.c \
 	intel_extensions.c \
+	intel_generatemipmap.c \
 	intel_mipmap_tree.c \
 	intel_tex_layout.c \
 	intel_tex_image.c \
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index 1f9f363df92..367d2a3b648 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -73,7 +73,7 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)
          p->params_uptodate = 0;
    }
 
-   if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM))
+   if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
       i915_update_fog(ctx);
 }
 
diff --git a/src/mesa/drivers/dri/i915/intel_generatemipmap.c b/src/mesa/drivers/dri/i915/intel_generatemipmap.c
new file mode 120000
index 00000000000..4c6b37ada01
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_generatemipmap.c
@@ -0,0 +1 @@
+../intel/intel_generatemipmap.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 2934414d99a..9712c387254 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -14,6 +14,7 @@ DRIVER_SOURCES = \
 	intel_decode.c \
 	intel_extensions.c \
 	intel_fbo.c \
+	intel_generatemipmap.c \
 	intel_mipmap_tree.c \
 	intel_regions.c \
 	intel_screen.c \
@@ -69,6 +70,7 @@ DRIVER_SOURCES = \
 	brw_vs_constval.c \
 	brw_vs_emit.c \
 	brw_vs_state.c \
+	brw_vs_surface_state.c \
 	brw_vtbl.c \
 	brw_wm.c \
 	brw_wm_debug.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index aef2ff5f86f..873fc8ffff6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -131,6 +131,7 @@ struct brw_context;
 #define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
 #define BRW_NEW_INPUT_VARYING           0x200
 #define BRW_NEW_PSP                     0x800
+#define BRW_NEW_WM_SURFACES		0x1000
 #define BRW_NEW_FENCE                   0x2000
 #define BRW_NEW_INDICES			0x4000
 #define BRW_NEW_VERTICES		0x8000
@@ -245,6 +246,9 @@ struct brw_vs_ouput_sizes {
 };
 
 
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
 /** Number of texture sampler units */
 #define BRW_MAX_TEX_UNIT 16
 
@@ -450,8 +454,6 @@ struct brw_context
 
    struct {
       struct brw_state_flags dirty;
-      struct brw_tracked_state **atoms;
-      GLuint nr_atoms;
 
       GLuint nr_color_regions;
       struct intel_region *color_regions[MAX_DRAW_BUFFERS];
@@ -471,7 +473,8 @@ struct brw_context
       int validated_bo_count;
    } state;
 
-   struct brw_cache cache;
+   struct brw_cache cache;  /** non-surface items */
+   struct brw_cache surface_cache;  /* surface items */
    struct brw_cached_batch_item *cached_batch_items;
 
    struct {
@@ -555,11 +558,6 @@ struct brw_context
       GLuint vs_size;
       GLuint total_size;
 
-      /* Dynamic tracker which changes to reflect the state referenced
-       * by active fp and vp program parameters:
-       */
-      struct brw_tracked_state tracked_state;
-
       dri_bo *curbe_bo;
       /** Offset within curbe_bo of space for current curbe entry */
       GLuint curbe_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 9197fede2d8..a1a6c53d0e0 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -36,6 +36,7 @@
 #include "main/macros.h"
 #include "main/enums.h"
 #include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
 #include "shader/prog_statevars.h"
 #include "intel_batchbuffer.h"
 #include "intel_regions.h"
@@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
    GLfloat *buf;
    GLuint i;
 
-   /* Update our own dependency flags.  This works because this
-    * function will also be called whenever fp or vp changes.
-    */
-   brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
-   brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags;
-   brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
-
    if (sz == 0) {
       if (brw->curbe.last_buf) {
 	 free(brw->curbe.last_buf);
@@ -335,78 +329,11 @@ static void prepare_constant_buffer(struct brw_context *brw)
     */
 }
 
-
-/**
- * Copy Mesa program parameters into given constant buffer.
- */
-static void
-update_constant_buffer(struct brw_context *brw,
-                       const struct gl_program_parameter_list *params,
-                       dri_bo *const_buffer)
-{
-   struct intel_context *intel = &brw->intel;
-   const int size = params->NumParameters * 4 * sizeof(GLfloat);
-
-   /* copy Mesa program constants into the buffer */
-   if (const_buffer && size > 0) {
-
-      assert(const_buffer);
-      assert(const_buffer->size >= size);
-
-      if (intel->intelScreen->kernel_exec_fencing) {
-         drm_intel_gem_bo_map_gtt(const_buffer);
-         memcpy(const_buffer->virtual, params->ParameterValues, size);
-         drm_intel_gem_bo_unmap_gtt(const_buffer);
-      }
-      else {
-         dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
-      }
-
-      if (0) {
-         int i;
-         for (i = 0; i < params->NumParameters; i++) {
-            float *p = params->ParameterValues[i];
-            printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]);
-         }
-      }
-   }
-}
-
-
-/** Copy current vertex program's parameters into the constant buffer */
-static void
-update_vertex_constant_buffer(struct brw_context *brw)
-{
-   struct brw_vertex_program *vp =
-      (struct brw_vertex_program *) brw->vertex_program;
-   if (0) {
-      printf("update VS constants in buffer %p\n", vp->const_buffer);
-      printf("program %u\n", vp->program.Base.Id);
-   }
-   if (vp->use_const_buffer)
-      update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer);
-}
-
-
-/** Copy current fragment program's parameters into the constant buffer */
-static void
-update_fragment_constant_buffer(struct brw_context *brw)
-{
-   struct brw_fragment_program *fp =
-      (struct brw_fragment_program *) brw->fragment_program;
-   if (fp->use_const_buffer)
-      update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer);
-}
-
-
 static void emit_constant_buffer(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
    GLuint sz = brw->curbe.total_size;
 
-   update_vertex_constant_buffer(brw);
-   update_fragment_constant_buffer(brw);
-
    BEGIN_BATCH(2, IGNORE_CLIPRECTS);
    if (sz == 0) {
       OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
@@ -428,7 +355,7 @@ static void emit_constant_buffer(struct brw_context *brw)
  */
 const struct brw_tracked_state brw_constant_buffer = {
    .dirty = {
-      .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION),      /* plus fp and vp flags */
+      .mesa = _NEW_PROGRAM_CONSTANTS,
       .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
 	       BRW_NEW_VERTEX_PROGRAM |
 	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 62c98bd8bb3..bc7756ceab4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -97,7 +97,7 @@ struct brw_glsl_call;
 
 
 #define BRW_EU_MAX_INSN_STACK 5
-#define BRW_EU_MAX_INSN 4000
+#define BRW_EU_MAX_INSN 10000
 
 struct brw_compile {
    struct brw_instruction store[BRW_EU_MAX_INSN];
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 9bc5c35139c..4784254bc7d 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -118,7 +118,10 @@ static void upload_binding_table_pointers(struct brw_context *brw)
 
    BEGIN_BATCH(6, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
-   OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+   if (brw->vs.bind_bo != NULL)
+      OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+   else
+      OUT_BATCH(0);
    OUT_BATCH(0); /* gs */
    OUT_BATCH(0); /* clip */
    OUT_BATCH(0); /* sf */
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 81b0a45998f..bf9f6cae55e 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp;
 const struct brw_tracked_state brw_state_base_address;
 const struct brw_tracked_state brw_urb_fence;
 const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
 const struct brw_tracked_state brw_vs_prog;
 const struct brw_tracked_state brw_vs_unit;
 const struct brw_tracked_state brw_wm_input_sizes;
 const struct brw_tracked_state brw_wm_prog;
 const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
 const struct brw_tracked_state brw_wm_surfaces;
 const struct brw_tracked_state brw_wm_unit;
 
@@ -91,6 +93,20 @@ const struct brw_tracked_state brw_drawing_rect;
 const struct brw_tracked_state brw_indices;
 const struct brw_tracked_state brw_vertices;
 
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
+   GLenum target, depthmode;
+   dri_bo *bo;
+   GLint format, internal_format;
+   GLint first_level, last_level;
+   GLint width, height, depth;
+   GLint pitch, cpp;
+   uint32_t tiling;
+   GLuint offset;
+};
+
 /***********************************************************************
  * brw_state.c
  */
@@ -135,8 +151,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
 			  void *aux_return);
 void brw_state_cache_check_size( struct brw_context *brw );
 
-void brw_init_cache( struct brw_context *brw );
-void brw_destroy_cache( struct brw_context *brw );
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
 
 /***********************************************************************
  * brw_state_batch.c
@@ -150,4 +166,9 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 void brw_destroy_batch_cache( struct brw_context *brw );
 void brw_clear_batch_cache_flush( struct brw_context *brw );
 
+/* brw_wm_surface_state.c */
+dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+                             struct brw_surface_key *key );
+
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index d5b51664066..e40d7a04164 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -56,9 +56,9 @@
  * incorrect program is run for the other instance.
  */
 
+#include "main/imports.h"
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
-#include "main/imports.h"
 
 /* XXX: Fixme - have to include these to get the sizes of the prog_key
  * structs:
@@ -69,8 +69,10 @@
 #include "brw_sf.h"
 #include "brw_gs.h"
 
-static GLuint hash_key( const void *key, GLuint key_size,
-			dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+         dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
 {
    GLuint *ikey = (GLuint *)key;
    GLuint hash = 0, i;
@@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size,
    return hash;
 }
 
+
 /**
  * Marks a new buffer as being chosen for the given cache id.
  */
@@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
    cache->brw->state.dirty.cache |= 1 << cache_id;
 }
 
+
 static struct brw_cache_item *
 search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
 	     GLuint hash, const void *key, GLuint key_size,
@@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
 }
 
 
-static void rehash( struct brw_cache *cache )
+static void
+rehash(struct brw_cache *cache)
 {
    struct brw_cache_item **items;
    struct brw_cache_item *c, *next;
@@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache )
    cache->size = size;
 }
 
+
 /**
  * Returns the buffer object matching cache_id and key, or NULL.
  */
-dri_bo *brw_search_cache( struct brw_cache *cache,
-			  enum brw_cache_id cache_id,
-			  const void *key,
-			  GLuint key_size,
-			  dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
-			  void *aux_return )
+dri_bo *
+brw_search_cache(struct brw_cache *cache,
+                 enum brw_cache_id cache_id,
+                 const void *key,
+                 GLuint key_size,
+                 dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+                 void *aux_return)
 {
    struct brw_cache_item *item;
    GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
@@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
    return item->bo;
 }
 
+
 dri_bo *
 brw_upload_cache( struct brw_cache *cache,
 		  enum brw_cache_id cache_id,
@@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache,
    return bo;
 }
 
-/* This doesn't really work with aux data.  Use search/upload instead
+
+/**
+ * This doesn't really work with aux data.  Use search/upload instead
  */
 dri_bo *
 brw_cache_data_sz(struct brw_cache *cache,
@@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache,
    return bo;
 }
 
+
 /**
  * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
  *
@@ -319,21 +330,22 @@ enum pool_type {
    DW_GENERAL_STATE
 };
 
+
 static void
-brw_init_cache_id( struct brw_context *brw,
-		const char *name,
-		enum brw_cache_id id,
-		GLuint key_size,
-		GLuint aux_size)
+brw_init_cache_id(struct brw_cache *cache,
+                  const char *name,
+                  enum brw_cache_id id,
+                  GLuint key_size,
+                  GLuint aux_size)
 {
-   struct brw_cache *cache = &brw->cache;
-
    cache->name[id] = strdup(name);
    cache->key_size[id] = key_size;
    cache->aux_size[id] = aux_size;
 }
 
-void brw_init_cache( struct brw_context *brw )
+
+static void
+brw_init_non_surface_cache(struct brw_context *brw)
 {
    struct brw_cache *cache = &brw->cache;
 
@@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw )
    cache->size = 7;
    cache->n_items = 0;
    cache->items = (struct brw_cache_item **)
-      _mesa_calloc(cache->size * 
-		   sizeof(struct brw_cache_item));
+      _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "CC_VP",
 		     BRW_CC_VP,
 		     sizeof(struct brw_cc_viewport),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "CC_UNIT",
 		     BRW_CC_UNIT,
 		     sizeof(struct brw_cc_unit_state),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "WM_PROG",
 		     BRW_WM_PROG,
 		     sizeof(struct brw_wm_prog_key),
 		     sizeof(struct brw_wm_prog_data));
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "SAMPLER_DEFAULT_COLOR",
 		     BRW_SAMPLER_DEFAULT_COLOR,
 		     sizeof(struct brw_sampler_default_color),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "SAMPLER",
 		     BRW_SAMPLER,
 		     0,		/* variable key/data size */
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "WM_UNIT",
 		     BRW_WM_UNIT,
 		     sizeof(struct brw_wm_unit_state),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "SF_PROG",
 		     BRW_SF_PROG,
 		     sizeof(struct brw_sf_prog_key),
 		     sizeof(struct brw_sf_prog_data));
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "SF_VP",
 		     BRW_SF_VP,
 		     sizeof(struct brw_sf_viewport),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "SF_UNIT",
 		     BRW_SF_UNIT,
 		     sizeof(struct brw_sf_unit_state),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "VS_UNIT",
 		     BRW_VS_UNIT,
 		     sizeof(struct brw_vs_unit_state),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "VS_PROG",
 		     BRW_VS_PROG,
 		     sizeof(struct brw_vs_prog_key),
 		     sizeof(struct brw_vs_prog_data));
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "CLIP_UNIT",
 		     BRW_CLIP_UNIT,
 		     sizeof(struct brw_clip_unit_state),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "CLIP_PROG",
 		     BRW_CLIP_PROG,
 		     sizeof(struct brw_clip_prog_key),
 		     sizeof(struct brw_clip_prog_data));
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "GS_UNIT",
 		     BRW_GS_UNIT,
 		     sizeof(struct brw_gs_unit_state),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "GS_PROG",
 		     BRW_GS_PROG,
 		     sizeof(struct brw_gs_prog_key),
 		     sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_cache(struct brw_context *brw)
+{
+   struct brw_cache *cache = &brw->surface_cache;
+
+   cache->brw = brw;
 
-   brw_init_cache_id(brw,
+   cache->size = 7;
+   cache->n_items = 0;
+   cache->items = (struct brw_cache_item **)
+      _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+
+   brw_init_cache_id(cache,
 		     "SS_SURFACE",
 		     BRW_SS_SURFACE,
 		     sizeof(struct brw_surface_state),
 		     0);
 
-   brw_init_cache_id(brw,
+   brw_init_cache_id(cache,
 		     "SS_SURF_BIND",
 		     BRW_SS_SURF_BIND,
 		     0,
 		     0);
 }
 
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+   brw_init_non_surface_cache(brw);
+   brw_init_surface_cache(brw);
+}
+
+
 static void
-brw_clear_cache( struct brw_context *brw )
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
 {
    struct brw_cache_item *c, *next;
    GLuint i;
@@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw )
    if (INTEL_DEBUG & DEBUG_STATE)
       _mesa_printf("%s\n", __FUNCTION__);
 
-   for (i = 0; i < brw->cache.size; i++) {
-      for (c = brw->cache.items[i]; c; c = next) {
+   for (i = 0; i < cache->size; i++) {
+      for (c = cache->items[i]; c; c = next) {
 	 int j;
 
 	 next = c->next;
@@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw )
 	 free((void *)c->key);
 	 free(c);
       }
-      brw->cache.items[i] = NULL;
+      cache->items[i] = NULL;
    }
 
-   brw->cache.n_items = 0;
+   cache->n_items = 0;
 
    if (brw->curbe.last_buf) {
       _mesa_free(brw->curbe.last_buf);
@@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw )
    brw->state.dirty.cache |= ~0;
 }
 
-void brw_state_cache_check_size( struct brw_context *brw )
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
 {
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
    /* un-tuned guess.  We've got around 20 state objects for a total of around
     * 32k, so 1000 of them is around 1.5MB.
     */
    if (brw->cache.n_items > 1000)
-      brw_clear_cache(brw);
+      brw_clear_cache(brw, &brw->cache);
+
+   if (brw->surface_cache.n_items > 1000)
+      brw_clear_cache(brw, &brw->surface_cache);
 }
 
-void brw_destroy_cache( struct brw_context *brw )
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
 {
    GLuint i;
 
-   brw_clear_cache(brw);
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+   brw_clear_cache(brw, cache);
    for (i = 0; i < BRW_MAX_CACHE; i++) {
-      dri_bo_unreference(brw->cache.last_bo[i]);
-      free(brw->cache.name[i]);
+      dri_bo_unreference(cache->last_bo[i]);
+      free(cache->name[i]);
    }
-   free(brw->cache.items);
-   brw->cache.items = NULL;
-   brw->cache.size = 0;
+   free(cache->items);
+   cache->items = NULL;
+   cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+   brw_destroy_cache(brw, &brw->cache);
+   brw_destroy_cache(brw, &brw->surface_cache);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 5de1450e612..c6dfea4743c 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] =
    &brw_curbe_offsets,
    &brw_recalculate_urb_fence,
 
-
    &brw_cc_vp,
    &brw_cc_unit,
 
-   &brw_wm_surfaces,		/* must do before samplers */
+   &brw_vs_surfaces,		/* must do before unit */
+   &brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   &brw_wm_surfaces,		/* must do before samplers and unit */
    &brw_wm_samplers,
 
    &brw_wm_unit,
@@ -88,54 +89,26 @@ const struct brw_tracked_state *atoms[] =
 
    &brw_line_stipple,
    &brw_aa_line_parameters,
-   /* Ordering of the commands below is documented as fixed.  
-    */
-#if 0
-   &brw_pipelined_state_pointers,
-   &brw_urb_fence,
-   &brw_constant_buffer_state,
-#else
+
    &brw_psp_urb_cbs,
-#endif
 
    &brw_drawing_rect,
    &brw_indices,
    &brw_vertices,
 
-   NULL,			/* brw_constant_buffer */
+   &brw_constant_buffer
 };
 
 
 void brw_init_state( struct brw_context *brw )
 {
-   GLuint i;
-
-   brw_init_cache(brw);
-
-   brw->state.atoms = _mesa_malloc(sizeof(atoms));
-   brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
-   _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms));
-
-   /* Patch in a pointer to the dynamic state atom:
-    */
-   for (i = 0; i < brw->state.nr_atoms; i++)
-      if (brw->state.atoms[i] == NULL)
-	 brw->state.atoms[i] = &brw->curbe.tracked_state;
-
-   _mesa_memcpy(&brw->curbe.tracked_state, 
-		&brw_constant_buffer,
-		sizeof(brw_constant_buffer));
+   brw_init_caches(brw);
 }
 
 
 void brw_destroy_state( struct brw_context *brw )
 {
-   if (brw->state.atoms) {
-      _mesa_free(brw->state.atoms);
-      brw->state.atoms = NULL;
-   }
-
-   brw_destroy_cache(brw);
+   brw_destroy_caches(brw);
    brw_destroy_batch_cache(brw);
 }
 
@@ -218,6 +191,7 @@ static struct dirty_bit_map mesa_bits[] = {
    DEFINE_BIT(_NEW_MULTISAMPLE),
    DEFINE_BIT(_NEW_TRACK_MATRIX),
    DEFINE_BIT(_NEW_PROGRAM),
+   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
    {0, 0, 0}
 };
 
@@ -336,7 +310,7 @@ void brw_validate_state( struct brw_context *brw )
 
    /* do prepare stage for all atoms */
    for (i = 0; i < Elements(atoms); i++) {
-      const struct brw_tracked_state *atom = brw->state.atoms[i];
+      const struct brw_tracked_state *atom = atoms[i];
 
       if (brw->intel.Fallback)
          break;
@@ -367,8 +341,8 @@ void brw_upload_state(struct brw_context *brw)
       _mesa_memset(&examined, 0, sizeof(examined));
       prev = *state;
 
-      for (i = 0; i < brw->state.nr_atoms; i++) {	 
-	 const struct brw_tracked_state *atom = brw->state.atoms[i];
+      for (i = 0; i < Elements(atoms); i++) {	 
+	 const struct brw_tracked_state *atom = atoms[i];
 	 struct brw_state_flags generated;
 
 	 assert(atom->dirty.mesa ||
@@ -397,7 +371,7 @@ void brw_upload_state(struct brw_context *brw)
    }
    else {
       for (i = 0; i < Elements(atoms); i++) {	 
-	 const struct brw_tracked_state *atom = brw->state.atoms[i];
+	 const struct brw_tracked_state *atom = atoms[i];
 
 	 if (brw->intel.Fallback)
 	    break;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index b69616d6e52..d7f75e3685e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -69,13 +69,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 {
    GLuint i, reg = 0, mrf;
 
-#if 0
-   if (c->vp->program.Base.Parameters->NumParameters >= 6)
-      c->vp->use_const_buffer = 1;
+   /* Determine whether to use a real constant buffer or use a block
+    * of GRF registers for constants.  The later is faster but only
+    * works if everything fits in the GRF.
+    * XXX this heuristic/check may need some fine tuning...
+    */
+   if (c->vp->program.Base.Parameters->NumParameters +
+       c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+      c->vp->use_const_buffer = GL_TRUE;
    else
-#endif
       c->vp->use_const_buffer = GL_FALSE;
-   /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
+
+   /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
 
    /* r0 -- reserved as usual
     */
@@ -709,10 +714,11 @@ get_constant(struct brw_vs_compile *c,
    struct brw_compile *p = &c->func;
    struct brw_reg const_reg;
    struct brw_reg const2_reg;
+   const GLboolean relAddr = src->RelAddr;
 
    assert(argIndex < 3);
 
-   if (c->current_const[argIndex].index != src->Index || src->RelAddr) {
+   if (c->current_const[argIndex].index != src->Index || relAddr) {
       struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
 
       c->current_const[argIndex].index = src->Index;
@@ -725,13 +731,13 @@ get_constant(struct brw_vs_compile *c,
       brw_dp_READ_4_vs(p,
                        c->current_const[argIndex].reg,/* writeback dest */
                        0,                             /* oword */
-                       src->RelAddr,                  /* relative indexing? */
+                       relAddr,                       /* relative indexing? */
                        addrReg,                       /* address register */
                        16 * src->Index,               /* byte offset */
                        SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
                        );
 
-      if (src->RelAddr) {
+      if (relAddr) {
          /* second read */
          const2_reg = get_tmp(c);
 
@@ -742,7 +748,7 @@ get_constant(struct brw_vs_compile *c,
          brw_dp_READ_4_vs(p,
                           const2_reg,              /* writeback dest */
                           1,                       /* oword */
-                          src->RelAddr,            /* relative indexing? */
+                          relAddr,                 /* relative indexing? */
                           addrReg,                 /* address register */
                           16 * src->Index,         /* byte offset */
                           SURF_INDEX_VERT_CONST_BUFFER
@@ -752,7 +758,7 @@ get_constant(struct brw_vs_compile *c,
 
    const_reg = c->current_const[argIndex].reg;
 
-   if (src->RelAddr) {
+   if (relAddr) {
       /* merge the two Owords into the constant register */
       /* const_reg[7..4] = const2_reg[7..4] */
       brw_MOV(p,
@@ -1219,7 +1225,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
 
    for (insn = 0; insn < nr_insns; insn++) {
 
-      struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+      const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
       struct brw_reg args[3], dst;
       GLuint i;
       
@@ -1232,7 +1238,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
        */
       if (inst->Opcode != OPCODE_SWZ)
 	  for (i = 0; i < 3; i++) {
-	      struct prog_src_register *src = &inst->SrcReg[i];
+	      const struct prog_src_register *src = &inst->SrcReg[i];
 	      index = src->Index;
 	      file = src->File;	
 	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
new file mode 100644
index 00000000000..89f47522a1c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -0,0 +1,226 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <[email protected]>
+  */
+
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_vs_update_constant_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *) brw->vertex_program;
+   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+   drm_intel_bo *const_buffer;
+
+   /* BRW_NEW_VERTEX_PROGRAM */
+   if (!vp->use_const_buffer)
+      return NULL;
+
+   const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+				     size, 64);
+
+   /* _NEW_PROGRAM_CONSTANTS */
+   dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+   return const_buffer;
+}
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+static void
+brw_update_vs_constant_surface( GLcontext *ctx,
+                                GLuint surf)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_surface_key key;
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *) brw->vertex_program;
+   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+
+   assert(surf == 0);
+
+   /* If we're in this state update atom, we need to update VS constants, so
+    * free the old buffer and create a new one for the new contents.
+    */
+   dri_bo_unreference(vp->const_buffer);
+   vp->const_buffer = brw_vs_update_constant_buffer(brw);
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (vp->const_buffer == 0) {
+      drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+      brw->vs.surf_bo[surf] = NULL;
+      return;
+   }
+
+   memset(&key, 0, sizeof(key));
+
+   key.format = MESA_FORMAT_RGBA_FLOAT32;
+   key.internal_format = GL_RGBA;
+   key.bo = vp->const_buffer;
+   key.depthmode = GL_NONE;
+   key.pitch = params->NumParameters;
+   key.width = params->NumParameters;
+   key.height = 1;
+   key.depth = 1;
+   key.cpp = 16;
+
+   /*
+   printf("%s:\n", __FUNCTION__);
+   printf("  width %d  height %d  depth %d  cpp %d  pitch %d\n",
+          key.width, key.height, key.depth, key.cpp, key.pitch);
+   */
+
+   drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+   brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+                                            BRW_SS_SURFACE,
+                                            &key, sizeof(key),
+                                            &key.bo, key.bo ? 1 : 0,
+                                            NULL);
+   if (brw->vs.surf_bo[surf] == NULL) {
+      brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+   }
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+   dri_bo *bind_bo;
+
+   bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+			      NULL, 0,
+			      brw->vs.surf_bo, BRW_VS_MAX_SURF,
+			      NULL);
+
+   if (bind_bo == NULL) {
+      GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
+      uint32_t *data = malloc(data_size);
+      int i;
+
+      for (i = 0; i < BRW_VS_MAX_SURF; i++)
+         if (brw->vs.surf_bo[i])
+            data[i] = brw->vs.surf_bo[i]->offset;
+         else
+            data[i] = 0;
+
+      bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+				  NULL, 0,
+				  brw->vs.surf_bo, BRW_VS_MAX_SURF,
+				  data, data_size,
+				  NULL, NULL);
+
+      /* Emit binding table relocations to surface state */
+      for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+	 if (brw->vs.surf_bo[i] != NULL) {
+	    /* The presumed offsets were set in the data values for
+	     * brw_upload_cache.
+	     */
+	    drm_intel_bo_emit_reloc(bind_bo, i * 4,
+				    brw->vs.surf_bo[i], 0,
+				    I915_GEM_DOMAIN_INSTRUCTION, 0);
+	 }
+      }
+
+      free(data);
+   }
+
+   return bind_bo;
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   int i;
+   int nr_surfaces = 0;
+
+   brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+   for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+      if (brw->vs.surf_bo[i] != NULL) {
+	 nr_surfaces = i + 1;
+      }
+   }
+
+   if (brw->vs.nr_surfaces != nr_surfaces) {
+      brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+      brw->vs.nr_surfaces = nr_surfaces;
+   }
+
+   /* Note that we don't end up updating the bind_bo if we don't have a
+    * surface to be pointing at.  This should be relatively harmless, as it
+    * just slightly increases our working set size.
+    */
+   if (brw->vs.nr_surfaces != 0) {
+      dri_bo_unreference(brw->vs.bind_bo);
+      brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+   }
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_VERTEX_PROGRAM),
+      .cache = 0
+   },
+   .prepare = prepare_vs_surfaces,
+};
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 90d74c2885c..cd65f57bbc9 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -146,6 +146,13 @@ static void do_wm_prog( struct brw_context *brw,
    if (c == NULL) {
       brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
       c = brw->wm.compile_data;
+      if (c == NULL) {
+         /* Ouch - big out of memory problem.  Can't continue
+          * without triggering a segfault, no way to signal,
+          * so just return.
+          */
+         return;
+      }
    } else {
       memset(c, 0, sizeof(*brw->wm.compile_data));
    }
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index f0d31fc1ddc..59ead757b51 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -240,15 +240,20 @@ struct brw_wm_compile {
    GLuint max_wm_grf;
    GLuint last_scratch;
 
+   GLuint cur_inst;  /**< index of current instruction */
+
+   GLboolean out_of_regs;  /**< ran out of GRF registers? */
+
    /** Mapping from Mesa registers to hardware registers */
    struct {
       GLboolean inited;
       struct brw_reg reg;
    } wm_regs[PROGRAM_PAYLOAD+1][256][4];
 
+   GLboolean used_grf[BRW_WM_MAX_GRF];
+   GLuint first_free_grf;
    struct brw_reg stack;
    struct brw_reg emit_mask_reg;
-   GLuint reg_index;  /**< Index of next free GRF register */
    GLuint tmp_regs[BRW_WM_MAX_GRF];
    GLuint tmp_index;
    GLuint tmp_max;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 117460842a3..23caf59435f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -1,5 +1,7 @@
 #include "main/macros.h"
 #include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_optimize.h"
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_wm.h"
@@ -42,6 +44,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
 }
 
 
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+   c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+   /*assert(c->used_grf[r]);*/
+   c->used_grf[r] = GL_FALSE;
+   c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+   GLuint r;
+   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+      if (!c->used_grf[r]) {
+         c->used_grf[r] = GL_TRUE;
+         c->first_free_grf = r + 1;  /* a guess */
+         return r;
+      }
+   }
+
+   /* no free temps, try to reclaim some */
+   reclaim_temps(c);
+   c->first_free_grf = 0;
+
+   /* try alloc again */
+   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+      if (!c->used_grf[r]) {
+         c->used_grf[r] = GL_TRUE;
+         c->first_free_grf = r + 1;  /* a guess */
+         return r;
+      }
+   }
+
+   for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+      assert(c->used_grf[r]);
+   }
+
+   /* really, no free GRF regs found */
+   if (!c->out_of_regs) {
+      /* print warning once per compilation */
+      _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+      c->out_of_regs = GL_TRUE;
+   }
+
+   return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+   int r;
+   for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+      if (c->used_grf[r])
+         return r + 1;
+   return 0;
+}
+
+
+
 /**
  * Record the mapping of a Mesa register to a hardware register.
  */
@@ -68,11 +147,23 @@ static int get_scalar_dst_index(const struct prog_instruction *inst)
 static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
 {
     struct brw_reg reg;
-    if(c->tmp_index == c->tmp_max)
-	c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
-    
+
+    /* if we need to allocate another temp, grow the tmp_regs[] array */
+    if (c->tmp_index == c->tmp_max) {
+       int r = alloc_grf(c);
+       if (r < 0) {
+          /*printf("Out of temps in %s\n", __FUNCTION__);*/
+          r = 50; /* XXX random register! */
+       }
+       c->tmp_regs[ c->tmp_max++ ] = r;
+    }
+
+    /* form the GRF register */
     reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+    /*printf("alloc_temp %d\n", reg.nr);*/
+    assert(reg.nr < BRW_WM_MAX_GRF);
     return reg;
+
 }
 
 /**
@@ -130,35 +221,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
 	    return brw_null_reg();
     }
 
+    assert(index < 256);
+    assert(component < 4);
+
     /* see if we've already allocated a HW register for this Mesa register */
     if (c->wm_regs[file][index][component].inited) {
-	/* yes, re-use */
-	reg = c->wm_regs[file][index][component].reg;
+       /* yes, re-use */
+       reg = c->wm_regs[file][index][component].reg;
     }
     else {
 	/* no, allocate new register */
-	reg = brw_vec8_grf(c->reg_index, 0);
-    }
+       int grf = alloc_grf(c);
+       /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+       if (grf < 0) {
+          /* totally out of temps */
+          grf = 51; /* XXX random register! */
+       }
 
-    /* if this is a new register allocation, record it in the table */
-    if (!c->wm_regs[file][index][component].inited) {
-	set_reg(c, file, index, component, reg);
-	c->reg_index++;
-    }
+       reg = brw_vec8_grf(grf, 0);
+       /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
 
-    if (c->reg_index >= BRW_WM_MAX_GRF - 12) {
-	/* ran out of temporary registers! */
-#if 1
-        /* This is a big hack for now.
-         * Return bad register index, just don't hang the GPU.
-         */
-        _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index);
-        c->reg_index = BRW_WM_MAX_GRF - 13;
-#else
-	return brw_null_reg();
-#endif
+       set_reg(c, file, index, component, reg);
     }
- 
+
     if (neg & (1 << component)) {
 	reg = negate(reg);
     }
@@ -168,6 +253,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
 }
 
 
+
+/**
+ * This is called if we run out of GRF registers.  Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+   GLint intBegin[MAX_PROGRAM_TEMPS];
+   GLint intEnd[MAX_PROGRAM_TEMPS];
+   int index;
+
+   /*printf("Reclaim temps:\n");*/
+
+   _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+                             intBegin, intEnd);
+
+   for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+      if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+         /* program temp[i] can be freed */
+         int component;
+         /*printf("  temp[%d] is dead\n", index);*/
+         for (component = 0; component < 4; component++) {
+            if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
+               int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+               release_grf(c, r);
+               /*
+               printf("  Reclaim temp %d, reg %d at inst %d\n",
+                      index, r, c->cur_inst);
+               */
+               c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+            }
+         }
+      }
+   }
+}
+
+
+
+
 /**
  * Preallocate registers.  This sets up the Mesa to hardware register
  * mapping for certain registers, such as constants (uniforms/state vars)
@@ -179,6 +304,10 @@ static void prealloc_reg(struct brw_wm_compile *c)
     struct brw_reg reg;
     int nr_interp_regs = 0;
     GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+    GLuint reg_index = 0;
+
+    memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+    c->first_free_grf = 0;
 
     for (i = 0; i < 4; i++) {
         if (i < c->key.nr_depth_regs) 
@@ -187,14 +316,20 @@ static void prealloc_reg(struct brw_wm_compile *c)
             reg = brw_vec8_grf(0, 0);
 	set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
     }
-    c->reg_index += 2 * c->key.nr_depth_regs;
+    reg_index += 2 * c->key.nr_depth_regs;
 
     /* constants */
     {
-        const int nr_params = c->fp->program.Base.Parameters->NumParameters;
+        const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+        const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
 
         /* use a real constant buffer, or just use a section of the GRF? */
-        c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/
+        /* XXX this heuristic may need adjustment... */
+        if ((nr_params + nr_temps) * 4 + reg_index > 80)
+           c->fp->use_const_buffer = GL_TRUE;
+        else
+           c->fp->use_const_buffer = GL_FALSE;
+        /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
 
         if (c->fp->use_const_buffer) {
            /* We'll use a real constant buffer and fetch constants from
@@ -216,7 +351,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
            for (i = 0; i < nr_params; i++) {
               /* loop over XYZW channels */
               for (j = 0; j < 4; j++, index++) {
-                 reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
+                 reg = brw_vec1_grf(reg_index + index / 8, index % 8);
                  /* Save pointer to parameter/constant value.
                   * Constants will be copied in prepare_constant_buffer()
                   */
@@ -226,7 +361,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
            }
            /* number of constant regs used (each reg is float[8]) */
            c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
-           c->reg_index += c->nr_creg;
+           reg_index += c->nr_creg;
         }
     }
 
@@ -234,20 +369,28 @@ static void prealloc_reg(struct brw_wm_compile *c)
     for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
 	if (inputs & (1<<i)) {
 	    nr_interp_regs++;
-	    reg = brw_vec8_grf(c->reg_index, 0);
+	    reg = brw_vec8_grf(reg_index, 0);
 	    for (j = 0; j < 4; j++)
 		set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
-	    c->reg_index += 2;
+	    reg_index += 2;
 	}
     }
 
     c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
     c->prog_data.urb_read_length = nr_interp_regs * 2;
     c->prog_data.curb_read_length = c->nr_creg;
-    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
-    c->reg_index++;
-    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
-    c->reg_index += 2;
+    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+    reg_index++;
+    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+    reg_index += 2;
+
+    /* mark GRF regs [0..reg_index-1] as in-use */
+    for (i = 0; i < reg_index; i++)
+       prealloc_grf(c, i);
+
+    /* Don't use GRF 126, 127.  Using them seems to lead to GPU lock-ups */
+    prealloc_grf(c, 126);
+    prealloc_grf(c, 127);
 
     /* An instruction may reference up to three constants.
      * They'll be found in these registers.
@@ -256,12 +399,12 @@ static void prealloc_reg(struct brw_wm_compile *c)
     if (c->fp->use_const_buffer) {
        for (i = 0; i < 3; i++) {
           c->current_const[i].index = -1;
-          c->current_const[i].reg = alloc_tmp(c);
+          c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
        }
     }
 #if 0
     printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
-    printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index);
+    printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
 #endif
 }
 
@@ -283,23 +426,21 @@ static void fetch_constants(struct brw_wm_compile *c,
       if (src->File == PROGRAM_STATE_VAR ||
           src->File == PROGRAM_CONSTANT ||
           src->File == PROGRAM_UNIFORM) {
-         if (c->current_const[i].index != src->Index) {
-            c->current_const[i].index = src->Index;
+	 c->current_const[i].index = src->Index;
 
 #if 0
-            printf("  fetch const[%d] for arg %d into reg %d\n",
-                   src->Index, i, c->current_const[i].reg.nr);
+	 printf("  fetch const[%d] for arg %d into reg %d\n",
+		src->Index, i, c->current_const[i].reg.nr);
 #endif
 
-            /* need to fetch the constant now */
-            brw_dp_READ_4(p,
-                          c->current_const[i].reg,  /* writeback dest */
-                          1,                        /* msg_reg */
-                          src->RelAddr,             /* relative indexing? */
-                          16 * src->Index,          /* byte offset */
-                          SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
-                          );
-         }
+	 /* need to fetch the constant now */
+	 brw_dp_READ_4(p,
+		       c->current_const[i].reg,  /* writeback dest */
+		       1,                        /* msg_reg */
+		       src->RelAddr,             /* relative indexing? */
+		       16 * src->Index,          /* byte offset */
+		       SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+		       );
       }
    }
 }
@@ -368,6 +509,14 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
     const GLuint nr = 1;
     const GLuint component = GET_SWZ(src->Swizzle, channel);
 
+    /* Extended swizzle terms */
+    if (component == SWIZZLE_ZERO) {
+       return brw_imm_f(0.0F);
+    }
+    else if (component == SWIZZLE_ONE) {
+       return brw_imm_f(1.0F);
+    }
+
     if (c->fp->use_const_buffer &&
         (src->File == PROGRAM_STATE_VAR ||
          src->File == PROGRAM_CONSTANT ||
@@ -665,27 +814,26 @@ static void emit_fb_write(struct brw_wm_compile *c,
     }
 
     if (c->key.dest_depth_reg) {
-        GLuint comp = c->key.dest_depth_reg / 2;
-        GLuint off = c->key.dest_depth_reg % 2;
+        const GLuint comp = c->key.dest_depth_reg / 2;
+        const GLuint off = c->key.dest_depth_reg % 2;
 
-        assert(comp == 1);
-        assert(off == 0);
-#if 0
-        /* XXX do we need this code?   comp always 1, off always 0, it seems */
         if (off != 0) {
+            /* XXX this code needs review/testing */
+            struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+            struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
             brw_push_insn_state(p);
             brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
-            brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+            brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
             /* 2nd half? */
-            brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+            brw_MOV(p, brw_message_reg(nr+1), arg1_1);
             brw_pop_insn_state(p);
         }
         else
-#endif
         {
-           struct brw_reg src =  get_src_reg(c, inst, 1, 1);
-           brw_MOV(p, brw_message_reg(nr), src);
+            struct brw_reg src =  get_src_reg(c, inst, 1, 1);
+            brw_MOV(p, brw_message_reg(nr), src);
         }
         nr += 2;
    }
@@ -2595,7 +2743,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
     struct brw_compile *p = &c->func;
     struct brw_indirect stack_index = brw_indirect(0, 0);
 
-    c->reg_index = 0;
+    c->out_of_regs = GL_FALSE;
+
     prealloc_reg(c);
     brw_set_compression_control(p, BRW_COMPRESSION_NONE);
     brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
@@ -2603,6 +2752,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
     for (i = 0; i < c->nr_fp_insns; i++) {
         const struct prog_instruction *inst = &c->prog_instructions[i];
 
+        c->cur_inst = i;
+
 #if 0
         _mesa_printf("Inst %d: ", i);
         _mesa_print_instruction(inst);
@@ -2833,17 +2984,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 		_mesa_printf("unsupported IR in fragment shader %d\n",
 			inst->Opcode);
 	}
+
 	if (inst->CondUpdate)
 	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
 	else
 	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
     }
     post_wm_emit(c);
-
-    if (c->reg_index >= BRW_WM_MAX_GRF) {
-        _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()");
-        /* XXX we need to do some proper error recovery here */
-    }
 }
 
 
@@ -2867,6 +3014,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
         brw_wm_print_program(c, "brw_wm_glsl_emit done");
     }
 
-    c->prog_data.total_grf = c->reg_index;
+    c->prog_data.total_grf = num_grf_used(c);
     c->prog_data.total_scratch = 0;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
index bd60ac9b315..8fd067abe7d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -116,6 +116,10 @@ const struct {
  { C, 0, 1, 1, 1 } 
 };
 
+/**
+ * \param line_aa  AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup  bitmask of IZ_* flags
+ */
 void brw_wm_lookup_iz( GLuint line_aa,
 		       GLuint lookup,
 		       struct brw_wm_prog_key *key )
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 805df8a4af4..c49a5f6b4ec 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -176,22 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
    }
 }
 
-
-/**
- * Use same key for WM and VS surfaces.
- */
-struct brw_surface_key {
-   GLenum target, depthmode;
-   dri_bo *bo;
-   GLint format, internal_format;
-   GLint first_level, last_level;
-   GLint width, height, depth;
-   GLint pitch, cpp;
-   uint32_t tiling;
-   GLuint offset;
-};
-
-
 static void
 brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
 {
@@ -268,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw,
       surf.ss0.cube_neg_z = 1;
    }
 
-   bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
 			 key, sizeof(*key),
 			 &key->bo, key->bo ? 1 : 0,
 			 &surf, sizeof(surf),
@@ -321,10 +305,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
    key.tiling = intelObj->mt->region->tiling;
 
    dri_bo_unreference(brw->wm.surf_bo[surf]);
-   brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
-                                         &key, sizeof(key),
-                                         &key.bo, key.bo ? 1 : 0,
-                                         NULL);
+   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+                                            BRW_SS_SURFACE,
+                                            &key, sizeof(key),
+                                            &key.bo, key.bo ? 1 : 0,
+                                            NULL);
    if (brw->wm.surf_bo[surf] == NULL) {
       brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
    }
@@ -336,7 +321,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
  * Create the constant buffer surface.  Vertex/fragment shader constants will be
  * read from this buffer with Data Port Read instructions/messages.
  */
-static dri_bo *
+dri_bo *
 brw_create_constant_surface( struct brw_context *brw,
                              struct brw_surface_key *key )
 {
@@ -362,7 +347,7 @@ brw_create_constant_surface( struct brw_context *brw,
    surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
    brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
  
-   bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
 			 key, sizeof(*key),
 			 &key->bo, key->bo ? 1 : 0,
 			 &surf, sizeof(surf),
@@ -380,39 +365,70 @@ brw_create_constant_surface( struct brw_context *brw,
    return bo;
 }
 
+/* Creates a new WM constant buffer reflecting the current fragment program's
+ * constants, if needed by the fragment program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_wm_update_constant_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+   drm_intel_bo *const_buffer;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (!fp->use_const_buffer)
+      return NULL;
+
+   const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
+				     size, 64);
+
+   /* _NEW_PROGRAM_CONSTANTS */
+   dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+   return const_buffer;
+}
 
 /**
  * Update the surface state for a WM constant buffer.
  * The constant buffer will be (re)allocated here if needed.
  */
-static dri_bo *
+static void
 brw_update_wm_constant_surface( GLcontext *ctx,
-                                GLuint surf,
-                                dri_bo *const_buffer,
-                                const struct gl_program_parameter_list *params)
+                                GLuint surf)
 {
    struct brw_context *brw = brw_context(ctx);
    struct brw_surface_key key;
-   struct intel_context *intel = &brw->intel;
-   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   const struct gl_program_parameter_list *params =
+      fp->program.Base.Parameters;
 
-   /* free old const buffer if too small */
-   if (const_buffer && const_buffer->size < size) {
-      dri_bo_unreference(const_buffer);
-      const_buffer = NULL;
-   }
+   /* If we're in this state update atom, we need to update WM constants, so
+    * free the old buffer and create a new one for the new contents.
+    */
+   dri_bo_unreference(fp->const_buffer);
+   fp->const_buffer = brw_wm_update_constant_buffer(brw);
 
-   /* alloc new buffer if needed */
-   if (!const_buffer) {
-      const_buffer =
-         drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64);
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (fp->const_buffer == 0) {
+      drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+      brw->wm.surf_bo[surf] = NULL;
+      return;
    }
 
    memset(&key, 0, sizeof(key));
 
    key.format = MESA_FORMAT_RGBA_FLOAT32;
    key.internal_format = GL_RGBA;
-   key.bo = const_buffer;
+   key.bo = fp->const_buffer;
    key.depthmode = GL_NONE;
    key.pitch = params->NumParameters;
    key.width = params->NumParameters;
@@ -427,77 +443,59 @@ brw_update_wm_constant_surface( GLcontext *ctx,
    */
 
    dri_bo_unreference(brw->wm.surf_bo[surf]);
-   brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+                                            BRW_SS_SURFACE,
                                             &key, sizeof(key),
                                             &key.bo, key.bo ? 1 : 0,
                                             NULL);
    if (brw->wm.surf_bo[surf] == NULL) {
       brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
    }
-
-   return const_buffer;
+   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
 }
 
-
 /**
- * Update the surface state for a VS constant buffer.
- * The constant buffer will be (re)allocated here if needed.
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
  */
-static dri_bo *
-brw_update_vs_constant_surface( GLcontext *ctx,
-                                GLuint surf,
-                                dri_bo *const_buffer,
-                                const struct gl_program_parameter_list *params)
+static void prepare_wm_constant_surface(struct brw_context *brw )
 {
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_surface_key key;
-   struct intel_context *intel = &brw->intel;
-   const int size = params->NumParameters * 4 * sizeof(GLfloat);
-
-   assert(surf == 0);
-
-   /* free old const buffer if too small */
-   if (const_buffer && const_buffer->size < size) {
-      dri_bo_unreference(const_buffer);
-      const_buffer = NULL;
-   }
-
-   /* alloc new buffer if needed */
-   if (!const_buffer) {
-      const_buffer =
-         drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
-   }
-
-   memset(&key, 0, sizeof(key));
-
-   key.format = MESA_FORMAT_RGBA_FLOAT32;
-   key.internal_format = GL_RGBA;
-   key.bo = const_buffer;
-   key.depthmode = GL_NONE;
-   key.pitch = params->NumParameters;
-   key.width = params->NumParameters;
-   key.height = 1;
-   key.depth = 1;
-   key.cpp = 16;
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
 
-   /*
-   printf("%s:\n", __FUNCTION__);
-   printf("  width %d  height %d  depth %d  cpp %d  pitch %d\n",
-          key.width, key.height, key.depth, key.cpp, key.pitch);
-   */
+   drm_intel_bo_unreference(fp->const_buffer);
+   fp->const_buffer = brw_wm_update_constant_buffer(brw);
 
-   dri_bo_unreference(brw->vs.surf_bo[surf]);
-   brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
-                                            &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
-                                            NULL);
-   if (brw->vs.surf_bo[surf] == NULL) {
-      brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (fp->const_buffer == 0) {
+      if (brw->wm.surf_bo[surf] != NULL) {
+	 drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+	 brw->wm.surf_bo[surf] = NULL;
+	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+      }
+      return;
    }
 
-   return const_buffer;
+   brw_update_wm_constant_surface(ctx, surf);
 }
 
+const struct brw_tracked_state brw_wm_constant_surface = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+      .cache = 0
+   },
+   .prepare = prepare_wm_constant_surface,
+};
+
 
 /**
  * Sets up a surface state structure to point at the given region.
@@ -507,7 +505,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
 static void
 brw_update_renderbuffer_surface(struct brw_context *brw,
 				struct gl_renderbuffer *rb,
-				unsigned int unit, GLboolean cached)
+				unsigned int unit)
 {
    GLcontext *ctx = &brw->intel.ctx;
    dri_bo *region_bo = NULL;
@@ -567,12 +565,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
 		      ctx->Color.BlendEnabled);
 
    dri_bo_unreference(brw->wm.surf_bo[unit]);
-   brw->wm.surf_bo[unit] = NULL;
-   if (cached) 
-       brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
-	       &key, sizeof(key),
-	       &region_bo, 1,
-	       NULL);
+   brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
+					    BRW_SS_SURFACE,
+					    &key, sizeof(key),
+					    &region_bo, 1,
+					    NULL);
 
    if (brw->wm.surf_bo[unit] == NULL) {
       struct brw_surface_state surf;
@@ -598,7 +595,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       surf.ss0.writedisable_alpha = !key.color_mask[3];
 
       /* Key size will never match key size for textures, so we're safe. */
-      brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+      brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
+                                               BRW_SS_SURFACE,
                                                &key, sizeof(key),
 					       &region_bo, 1,
 					       &surf, sizeof(surf),
@@ -630,7 +628,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
 
    assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
 
-   bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+   bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
 			      NULL, 0,
 			      brw->wm.surf_bo, brw->wm.nr_surfaces,
 			      NULL);
@@ -646,7 +644,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
          else
             data[i] = 0;
 
-      bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+      bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
 				  NULL, 0,
 				  brw->wm.surf_bo, brw->wm.nr_surfaces,
 				  data, data_size,
@@ -682,27 +680,17 @@ static void prepare_wm_surfaces(struct brw_context *brw )
       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
          brw_update_renderbuffer_surface(brw,
 					 ctx->DrawBuffer->_ColorDrawBuffers[i],
-					 i,
-					 GL_FALSE);
+					 i);
       }
    } else {
-      brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE);
+      brw_update_renderbuffer_surface(brw, NULL, 0);
    }
 
    old_nr_surfaces = brw->wm.nr_surfaces;
    brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
 
-   /* Update surface / buffer for fragment shader constant buffer */
-   {
-      const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
-      struct brw_fragment_program *fp =
-         (struct brw_fragment_program *) brw->fragment_program;
-      fp->const_buffer =
-         brw_update_wm_constant_surface(ctx, surf, fp->const_buffer,
-                                     fp->program.Base.Parameters);
-
-      brw->wm.nr_surfaces = surf + 1;
-   }
+   if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
+       brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
 
    /* Update surfaces for textures */
    for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
@@ -735,100 +723,16 @@ static void prepare_wm_surfaces(struct brw_context *brw )
       brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
 }
 
-
-/**
- * Constructs the binding table for the VS surface state.
- */
-static dri_bo *
-brw_vs_get_binding_table(struct brw_context *brw)
-{
-   dri_bo *bind_bo;
-
-   assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
-
-   bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
-			      NULL, 0,
-			      brw->vs.surf_bo, brw->vs.nr_surfaces,
-			      NULL);
-
-   if (bind_bo == NULL) {
-      GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint);
-      uint32_t *data = malloc(data_size);
-      int i;
-
-      for (i = 0; i < brw->vs.nr_surfaces; i++)
-         if (brw->vs.surf_bo[i])
-            data[i] = brw->vs.surf_bo[i]->offset;
-         else
-            data[i] = 0;
-
-      bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
-				  NULL, 0,
-				  brw->vs.surf_bo, brw->vs.nr_surfaces,
-				  data, data_size,
-				  NULL, NULL);
-
-      /* Emit binding table relocations to surface state */
-      for (i = 0; i < BRW_VS_MAX_SURF; i++) {
-	 if (brw->vs.surf_bo[i] != NULL) {
-	    dri_bo_emit_reloc(bind_bo,
-			      I915_GEM_DOMAIN_INSTRUCTION, 0,
-			      0,
-			      i * sizeof(GLuint),
-			      brw->vs.surf_bo[i]);
-	 }
-      }
-
-      free(data);
-   }
-
-   return bind_bo;
-}
-
-
-/**
- * Vertex shader surfaces.  Just constant buffer for now.  Could add vertex 
- * shader textures in the future.
- */
-static void prepare_vs_surfaces(struct brw_context *brw )
-{
-   GLcontext *ctx = &brw->intel.ctx;
-
-   /* Update surface / buffer for vertex shader constant buffer */
-   {
-      const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
-      struct brw_vertex_program *vp =
-         (struct brw_vertex_program *) brw->vertex_program;
-      vp->const_buffer =
-         brw_update_vs_constant_surface(ctx, surf, vp->const_buffer,
-                                        vp->program.Base.Parameters);
-
-      brw->vs.nr_surfaces = 1;
-   }
-
-   dri_bo_unreference(brw->vs.bind_bo);
-   brw->vs.bind_bo = brw_vs_get_binding_table(brw);
-
-   if (1)
-      brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
-}
-
-
-static void
-prepare_surfaces(struct brw_context *brw)
-{
-   prepare_wm_surfaces(brw);
-   prepare_vs_surfaces(brw);
-}
-
-
 const struct brw_tracked_state brw_wm_surfaces = {
    .dirty = {
-      .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
-      .brw = BRW_NEW_CONTEXT,
+      .mesa = (_NEW_COLOR |
+               _NEW_TEXTURE |
+               _NEW_BUFFERS),
+      .brw = (BRW_NEW_CONTEXT |
+	      BRW_NEW_WM_SURFACES),
       .cache = 0
    },
-   .prepare = prepare_surfaces,
+   .prepare = prepare_wm_surfaces,
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c
new file mode 120000
index 00000000000..4c6b37ada01
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_generatemipmap.c
@@ -0,0 +1 @@
+../intel/intel_generatemipmap.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index c849e4869e5..0db1f392c0a 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -35,6 +35,9 @@
 #include "intel_batchbuffer.h"
 #include "intel_regions.h"
 
+static GLboolean
+intel_bufferobj_unmap(GLcontext * ctx,
+                      GLenum target, struct gl_buffer_object *obj);
 
 /** Allocates a new dri_bo to store the data for the buffer object. */
 static void
@@ -100,7 +103,13 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 
    assert(intel_obj);
-   assert(!obj->Pointer); /* Mesa should have unmapped it */
+
+   /* Buffer objects are automatically unmapped when deleting according
+    * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
+    * (though it does if you call glDeleteBuffers)
+    */
+   if (obj->Pointer)
+      intel_bufferobj_unmap(ctx, 0, obj);
 
    if (intel_obj->region) {
       intel_bufferobj_release_region(intel, intel_obj);
@@ -205,6 +214,7 @@ intel_bufferobj_map(GLcontext * ctx,
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
    GLboolean read_only = (access == GL_READ_ONLY_ARB);
+   GLboolean write_only = (access == GL_WRITE_ONLY_ARB);
 
    assert(intel_obj);
 
@@ -216,7 +226,14 @@ intel_bufferobj_map(GLcontext * ctx,
       return NULL;
    }
 
-   dri_bo_map(intel_obj->buffer, !read_only);
+   if (write_only && intel->intelScreen->kernel_exec_fencing) {
+      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
+      intel_obj->mapped_gtt = GL_TRUE;
+   } else {
+      drm_intel_bo_map(intel_obj->buffer, !read_only);
+      intel_obj->mapped_gtt = GL_FALSE;
+   }
+
    obj->Pointer = intel_obj->buffer->virtual;
    return obj->Pointer;
 }
@@ -234,7 +251,11 @@ intel_bufferobj_unmap(GLcontext * ctx,
    assert(intel_obj);
    if (intel_obj->buffer != NULL) {
       assert(obj->Pointer);
-      dri_bo_unmap(intel_obj->buffer);
+      if (intel_obj->mapped_gtt) {
+	 drm_intel_gem_bo_unmap_gtt(intel_obj->buffer);
+      } else {
+	 drm_intel_bo_unmap(intel_obj->buffer);
+      }
       obj->Pointer = NULL;
    }
    return GL_TRUE;
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
index bf6dbd58f27..7ef723833c0 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
@@ -46,6 +46,7 @@ struct intel_buffer_object
    struct intel_region *region; /* Is there a zero-copy texture
                                    associated with this (pixel)
                                    buffer object? */
+   GLboolean mapped_gtt;
 };
 
 
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c
index b86cafea241..4f4ea45b74f 100644
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -157,7 +157,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
    /* Do this here, not core Mesa, since this function is called from
     * many places within the driver.
     */
-   if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+   if (ctx->NewState & _NEW_BUFFERS) {
       /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
       _mesa_update_framebuffer(ctx);
       /* this updates the DrawBuffer's Width/Height if it's a FBO */
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 5dc3df395d1..49eadc75328 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -394,7 +394,7 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
     if (!driContext->driScreenPriv->dri2.enabled)
 	return;
 
-    if (!intel->internal_viewport_call) {
+    if (!intel->internal_viewport_call && ctx->DrawBuffer->Name == 0) {
        intel_update_renderbuffers(driContext, driContext->driDrawablePriv);
        if (driContext->driDrawablePriv != driContext->driReadablePriv)
 	  intel_update_renderbuffers(driContext, driContext->driReadablePriv);
@@ -778,13 +778,64 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
       intel->prim.vb_bo = NULL;
 
       if (release_texture_heaps) {
-         /* This share group is about to go away, free our private
-          * texture object data.
+         /* Nothing is currently done here to free texture heaps;
+          * but we're not using the texture heap utilities, so I
+          * rather think we shouldn't.  I've taken a look, and can't
+          * find any private texture data hanging around anywhere, but
+          * I'm not yet certain there isn't any at all...
           */
-         if (INTEL_DEBUG & DEBUG_TEXTURE)
+         /* if (INTEL_DEBUG & DEBUG_TEXTURE)
             fprintf(stderr, "do something to free texture heaps\n");
+          */
       }
 
+      /* XXX In intelMakeCurrent() below, the context's static regions are 
+       * referenced inside the frame buffer; it's listed as a hack,
+       * with a comment of "XXX FBO temporary fix-ups!", but
+       * as long as it's there, we should release the regions here.
+       * The do/while loop around the block is used to allow the
+       * "continue" statements inside the block to exit the block,
+       * to avoid many layers of "if" constructs.
+       */
+      do {
+         __DRIdrawablePrivate * driDrawPriv = intel->driDrawable;
+         struct intel_framebuffer *intel_fb;
+         struct intel_renderbuffer *irbDepth, *irbStencil;
+         if (!driDrawPriv) {
+            /* We're already detached from the drawable; exit this block. */
+            continue;
+         }
+         intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate;
+         if (!intel_fb) {
+            /* The frame buffer is already gone; exit this block. */
+            continue;
+         }
+         irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+         irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+
+         /* If the regions of the frame buffer still match the regions
+          * of the context, release them.  If they've changed somehow,
+          * leave them alone.
+          */
+         if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) {
+	    intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL);
+         }
+         if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) {
+	    intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL);
+         }
+
+         if (irbDepth && irbDepth->region == intel->depth_region) {
+	    intel_renderbuffer_set_region(irbDepth, NULL);
+         }
+         /* Usually, the stencil buffer is the same as the depth buffer;
+          * but they're handled separately in MakeCurrent, so we'll
+          * handle them separately here.
+          */
+         if (irbStencil && irbStencil->region == intel->depth_region) {
+	    intel_renderbuffer_set_region(irbStencil, NULL);
+         }
+      } while (0);
+
       intel_region_release(&intel->front_region);
       intel_region_release(&intel->back_region);
       intel_region_release(&intel->depth_region);
@@ -793,6 +844,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
 
       /* free the Mesa context */
       _mesa_free_context_data(&intel->ctx);
+
+      
    }
 }
 
@@ -821,7 +874,10 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
           if (driDrawPriv != driReadPriv)
               intel_update_renderbuffers(driContextPriv, driReadPriv);
       } else {
-          /* XXX FBO temporary fix-ups! */
+          /* XXX FBO temporary fix-ups!  These are released in 
+           * intelDextroyContext(), above.  Changes here should be
+           * reflected there.
+           */
           /* if the renderbuffers don't have regions, init them from the context */
          struct intel_renderbuffer *irbDepth
             = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index f45e24ca3a1..e931e401e93 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -161,12 +161,22 @@ struct intel_context
    struct {
       struct gl_fragment_program *bitmap_fp;
       struct gl_vertex_program *passthrough_vp;
+      struct gl_buffer_object *texcoord_vbo;
 
       struct gl_fragment_program *saved_fp;
       GLboolean saved_fp_enable;
       struct gl_vertex_program *saved_vp;
       GLboolean saved_vp_enable;
 
+      struct gl_fragment_program *tex2d_fp;
+
+      GLboolean saved_texcoord_enable;
+      struct gl_buffer_object *saved_array_vbo, *saved_texcoord_vbo;
+      GLenum saved_texcoord_type;
+      GLsizei saved_texcoord_size, saved_texcoord_stride;
+      const void *saved_texcoord_ptr;
+      int saved_active_texture;
+
       GLint saved_vp_x, saved_vp_y;
       GLsizei saved_vp_width, saved_vp_height;
       GLenum saved_matrix_mode;
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 9ec1b4ec2f4..1e8b1878abe 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -48,6 +48,7 @@
 #define need_GL_EXT_point_parameters
 #define need_GL_EXT_secondary_color
 #define need_GL_EXT_stencil_two_side
+#define need_GL_APPLE_vertex_array_object
 #define need_GL_ATI_separate_stencil
 #define need_GL_ATI_envmap_bumpmap
 #define need_GL_NV_point_sprite
@@ -95,6 +96,7 @@ static const struct dri_extension card_extensions[] = {
    { "GL_EXT_texture_lod_bias",           NULL },
    { "GL_3DFX_texture_compression_FXT1",  NULL },
    { "GL_APPLE_client_storage",           NULL },
+   { "GL_APPLE_vertex_array_object",      GL_APPLE_vertex_array_object_functions},
    { "GL_MESA_pack_invert",               NULL },
    { "GL_MESA_ycbcr_texture",             NULL },
    { "GL_NV_blend_square",                NULL },
diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c
new file mode 100644
index 00000000000..02804b51fa8
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_generatemipmap.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <[email protected]>
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+#include "main/teximage.h"
+#include "main/texenv.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "main/texparam.h"
+#include "main/varray.h"
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "main/buffers.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/depth.h"
+#include "main/hash.h"
+#include "main/mipmap.h"
+#include "main/blend.h"
+#include "glapi/dispatch.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_pixel.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+
+static const char *intel_fp_tex2d =
+      "!!ARBfp1.0\n"
+      "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n"
+      "END\n";
+
+static GLboolean
+intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name,
+			    int level, int width, int height)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLfloat vertices[4][2];
+   GLint status;
+
+   /* Set to source from the previous level */
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1);
+
+   /* Set to draw into the current level */
+   _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
+				 GL_COLOR_ATTACHMENT0_EXT,
+				 GL_TEXTURE_2D,
+				 tex_name,
+				 level);
+   /* Choose to render to the color attachment. */
+   _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+
+   status = glCheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT);
+   if (status != GL_FRAMEBUFFER_COMPLETE_EXT)
+      return GL_FALSE;
+
+   intel_meta_set_passthrough_transform(intel);
+
+   /* XXX: Doing it right would involve setting up the transformation to do
+    * 0-1 mapping or something, and not changing the vertex data.
+    */
+   vertices[0][0] = 0;
+   vertices[0][1] = 0;
+   vertices[1][0] = width;
+   vertices[1][1] = 0;
+   vertices[2][0] = width;
+   vertices[2][1] = height;
+   vertices[3][0] = 0;
+   vertices[3][1] = height;
+
+   _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
+   _mesa_Enable(GL_VERTEX_ARRAY);
+   intel_meta_set_default_texrect(intel);
+
+   CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4));
+
+   intel_meta_restore_texcoords(intel);
+   intel_meta_restore_transform(intel);
+
+   return GL_TRUE;
+}
+
+static GLboolean
+intel_generate_mipmap_2d(GLcontext *ctx,
+			 GLenum target,
+			 struct gl_texture_object *texObj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLint old_active_texture;
+   int level, max_levels, start_level, end_level;
+   GLuint fb_name;
+   GLboolean success = GL_FALSE;
+   struct gl_framebuffer *saved_fbo = NULL;
+
+   _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
+		    GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT |
+		    GL_DEPTH_BUFFER_BIT);
+   _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
+   old_active_texture = ctx->Texture.CurrentUnit;
+   _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer);
+
+   _mesa_Disable(GL_POLYGON_STIPPLE);
+   _mesa_Disable(GL_DEPTH_TEST);
+   _mesa_Disable(GL_STENCIL_TEST);
+   _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+   _mesa_DepthMask(GL_FALSE);
+
+   /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our
+    * minification.
+    */
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
+   _mesa_Enable(GL_TEXTURE_2D);
+   _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
+		       GL_LINEAR_MIPMAP_NEAREST);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+
+   /* Bind the new renderbuffer to the color attachment point. */
+   _mesa_GenFramebuffersEXT(1, &fb_name);
+   _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name);
+
+   intel_meta_set_fragment_program(intel, &intel->meta.tex2d_fp,
+				   intel_fp_tex2d);
+   intel_meta_set_passthrough_vertex_program(intel);
+
+   max_levels = _mesa_max_texture_levels(ctx, texObj->Target);
+   start_level = texObj->BaseLevel;
+   end_level = texObj->MaxLevel;
+
+   /* Loop generating level+1 from level. */
+   for (level = start_level; level < end_level && level < max_levels - 1; level++) {
+      const struct gl_texture_image *srcImage;
+      int width, height;
+
+      srcImage = _mesa_select_tex_image(ctx, texObj, target, level);
+      if (srcImage->Border != 0)
+	 goto fail;
+
+      width = srcImage->Width / 2;
+      if (width < 1)
+	 width = 1;
+      height = srcImage->Height / 2;
+      if (height < 1)
+	 height = 1;
+
+      if (width == srcImage->Width &&
+	  height == srcImage->Height) {
+	 /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the
+	  * maximum texture level for the object, so break out when we've gone
+	  * over the edge.
+	  */
+	 break;
+      }
+
+      /* Make sure that there's space allocated for the target level.
+       * We could skip this if there's already space allocated and save some
+       * time.
+       */
+      _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat,
+		       width, height, 0,
+		       GL_RGBA, GL_UNSIGNED_INT, NULL);
+
+      if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1,
+				       width, height))
+	 goto fail;
+   }
+
+   success = GL_TRUE;
+
+fail:
+   intel_meta_restore_fragment_program(intel);
+   intel_meta_restore_vertex_program(intel);
+
+   _mesa_DeleteFramebuffersEXT(1, &fb_name);
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
+   if (saved_fbo)
+      _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name);
+   _mesa_reference_framebuffer(&saved_fbo, NULL);
+   _mesa_PopClientAttrib();
+   _mesa_PopAttrib();
+
+   return success;
+}
+
+
+/**
+ * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
+ * level).
+ *
+ * The texture object's miptree must be mapped.
+ *
+ * It would be really nice if this was just called by Mesa whenever mipmaps
+ * needed to be regenerated, rather than us having to remember to do so in
+ * each texture image modification path.
+ *
+ * This function should also include an accelerated path.
+ */
+void
+intel_generate_mipmap(GLcontext *ctx, GLenum target,
+                      struct gl_texture_object *texObj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   int face, i;
+
+   /* HW path */
+   if (target == GL_TEXTURE_2D &&
+       ctx->Extensions.EXT_framebuffer_object &&
+       ctx->Extensions.ARB_fragment_program &&
+       ctx->Extensions.ARB_vertex_program) {
+      GLboolean success;
+
+      /* We'll be accessing this texture using GL entrypoints, which should
+       * be resilient against other access to this texture.
+       */
+      _mesa_unlock_texture(ctx, texObj);
+      success = intel_generate_mipmap_2d(ctx, target, texObj);
+      _mesa_lock_texture(ctx, texObj);
+
+      if (success)
+	 return;
+   }
+
+   /* SW path */
+   intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
+   _mesa_generate_mipmap(ctx, target, texObj);
+   intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
+
+   /* Update the level information in our private data in the new images, since
+    * it didn't get set as part of a normal TexImage path.
+    */
+   for (face = 0; face < nr_faces; face++) {
+      for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+         struct intel_texture_image *intelImage;
+
+	 intelImage = intel_texture_image(texObj->Image[face][i]);
+	 if (intelImage == NULL)
+	    break;
+
+	 intelImage->level = i;
+	 intelImage->face = face;
+	 /* Unreference the miptree to signal that the new Data is a bare
+	  * pointer from mesa.
+	  */
+	 intel_miptree_release(intel, &intelImage->mt);
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 6e1e034e53d..f3652720ece 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -62,9 +62,10 @@ intel_miptree_create_internal(struct intel_context *intel,
    GLboolean ok;
    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
 
-   DBG("%s target %s format %s level %d..%d\n", __FUNCTION__,
+   DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target),
-       _mesa_lookup_enum_by_nr(internal_format), first_level, last_level);
+       _mesa_lookup_enum_by_nr(internal_format), 
+       first_level, last_level, mt);
 
    mt->target = target_to_target(target);
    mt->internal_format = internal_format;
@@ -89,6 +90,7 @@ intel_miptree_create_internal(struct intel_context *intel,
 
    if (!ok) {
       free(mt);
+      DBG("%s not okay - returning NULL\n", __FUNCTION__);
       return NULL;
    }
 
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
index fc0ac0b79c0..36a684b3b85 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel.c
@@ -27,9 +27,12 @@
 
 #include "main/enums.h"
 #include "main/state.h"
+#include "main/bufferobj.h"
 #include "main/context.h"
 #include "main/enable.h"
 #include "main/matrix.h"
+#include "main/texstate.h"
+#include "main/varray.h"
 #include "main/viewport.h"
 #include "swrast/swrast.h"
 #include "shader/arbprogram.h"
@@ -334,6 +337,85 @@ intel_meta_restore_fragment_program(struct intel_context *intel)
       _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB);
 }
 
+static const float default_texcoords[4][2] = { { 0.0, 0.0 },
+					       { 1.0, 0.0 },
+					       { 1.0, 1.0 },
+					       { 0.0, 1.0 } };
+
+void
+intel_meta_set_default_texrect(struct intel_context *intel)
+{
+   GLcontext *ctx = &intel->ctx;
+   struct gl_client_array *old_texcoord_array;
+
+   intel->meta.saved_active_texture = ctx->Texture.CurrentUnit;
+   if (intel->meta.saved_array_vbo == NULL) {
+      _mesa_reference_buffer_object(ctx, &intel->meta.saved_array_vbo,
+				    ctx->Array.ArrayBufferObj);
+   }
+
+   old_texcoord_array = &ctx->Array.ArrayObj->TexCoord[0];
+   intel->meta.saved_texcoord_type = old_texcoord_array->Type;
+   intel->meta.saved_texcoord_size = old_texcoord_array->Size;
+   intel->meta.saved_texcoord_stride = old_texcoord_array->Stride;
+   intel->meta.saved_texcoord_enable = old_texcoord_array->Enabled;
+   intel->meta.saved_texcoord_ptr = old_texcoord_array->Ptr;
+   _mesa_reference_buffer_object(ctx, &intel->meta.saved_texcoord_vbo,
+				 old_texcoord_array->BufferObj);
+
+   _mesa_ClientActiveTextureARB(GL_TEXTURE0);
+
+   if (intel->meta.texcoord_vbo == NULL) {
+      GLuint vbo_name;
+
+      _mesa_GenBuffersARB(1, &vbo_name);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, vbo_name);
+      _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(default_texcoords),
+			  default_texcoords, GL_STATIC_DRAW_ARB);
+      _mesa_reference_buffer_object(ctx, &intel->meta.texcoord_vbo,
+				    ctx->Array.ArrayBufferObj);
+   } else {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB,
+			  intel->meta.texcoord_vbo->Name);
+   }
+   _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), NULL);
+
+   _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+}
+
+void
+intel_meta_restore_texcoords(struct intel_context *intel)
+{
+   GLcontext *ctx = &intel->ctx;
+
+   /* Restore the old TexCoordPointer */
+   if (intel->meta.saved_texcoord_vbo) {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB,
+			  intel->meta.saved_texcoord_vbo->Name);
+      _mesa_reference_buffer_object(ctx, &intel->meta.saved_texcoord_vbo, NULL);
+   } else {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
+   }
+
+   _mesa_TexCoordPointer(intel->meta.saved_texcoord_size,
+			 intel->meta.saved_texcoord_type,
+			 intel->meta.saved_texcoord_stride,
+			 intel->meta.saved_texcoord_ptr);
+   if (!intel->meta.saved_texcoord_enable)
+      _mesa_Disable(GL_TEXTURE_COORD_ARRAY);
+
+   _mesa_ClientActiveTextureARB(GL_TEXTURE0 +
+				intel->meta.saved_active_texture);
+
+   if (intel->meta.saved_array_vbo) {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB,
+			  intel->meta.saved_array_vbo->Name);
+      _mesa_reference_buffer_object(ctx, &intel->meta.saved_array_vbo, NULL);
+   } else {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
+   }
+}
+
 void
 intelInitPixelFuncs(struct dd_function_table *functions)
 {
@@ -355,5 +437,7 @@ intel_free_pixel_state(struct intel_context *intel)
 
    _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL);
    _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL);
+   _mesa_reference_fragprog(ctx, &intel->meta.tex2d_fp, NULL);
+   _mesa_reference_buffer_object(ctx, &intel->meta.texcoord_vbo, NULL);
 }
 
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h
index cb41fa182cb..6acf0813c8c 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel.h
+++ b/src/mesa/drivers/dri/intel/intel_pixel.h
@@ -40,6 +40,9 @@ void intel_meta_set_fragment_program(struct intel_context *intel,
 				     const char *prog_string);
 void intel_meta_restore_fragment_program(struct intel_context *intel);
 void intel_free_pixel_state(struct intel_context *intel);
+void intel_meta_set_default_texrect(struct intel_context *intel);
+void intel_meta_set_default_texrect(struct intel_context *intel);
+void intel_meta_restore_texcoords(struct intel_context *intel);
 
 GLboolean intel_check_blit_fragment_ops(GLcontext * ctx,
 					GLboolean src_alpha_is_one);
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 1db7f5594e9..b20840b9a06 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -360,7 +360,6 @@ intel_texture_bitmap(GLcontext * ctx,
       "END\n";
    GLuint texname;
    GLfloat vertices[4][4];
-   GLfloat texcoords[4][2];
    GLint old_active_texture;
    GLubyte *unpacked_bitmap;
    GLubyte *a8_bitmap;
@@ -485,22 +484,12 @@ intel_texture_bitmap(GLcontext * ctx,
    vertices[3][2] = dst_z;
    vertices[3][3] = 1.0;
 
-   texcoords[0][0] = 0.0;
-   texcoords[0][1] = 0.0;
-   texcoords[1][0] = 1.0;
-   texcoords[1][1] = 0.0;
-   texcoords[2][0] = 1.0;
-   texcoords[2][1] = 1.0;
-   texcoords[3][0] = 0.0;
-   texcoords[3][1] = 1.0;
-
    _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
-   _mesa_ClientActiveTextureARB(GL_TEXTURE0);
-   _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
    _mesa_Enable(GL_VERTEX_ARRAY);
-   _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+   intel_meta_set_default_texrect(intel);
    CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4));
 
+   intel_meta_restore_texcoords(intel);
    intel_meta_restore_transform(intel);
    intel_meta_restore_fragment_program(intel);
    intel_meta_restore_vertex_program(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
index e8d5ac8569d..abcdcd5724c 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -70,7 +70,6 @@ intel_texture_drawpixels(GLcontext * ctx,
    struct intel_context *intel = intel_context(ctx);
    GLuint texname;
    GLfloat vertices[4][4];
-   GLfloat texcoords[4][2];
    GLfloat z;
    GLint old_active_texture;
    GLenum internalFormat;
@@ -169,22 +168,13 @@ intel_texture_drawpixels(GLcontext * ctx,
    vertices[3][2] = z;
    vertices[3][3] = 1.0;
 
-   texcoords[0][0] = 0.0;
-   texcoords[0][1] = 0.0;
-   texcoords[1][0] = 1.0;
-   texcoords[1][1] = 0.0;
-   texcoords[2][0] = 1.0;
-   texcoords[2][1] = 1.0;
-   texcoords[3][0] = 0.0;
-   texcoords[3][1] = 1.0;
-
    _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
-   _mesa_ClientActiveTextureARB(GL_TEXTURE0);
-   _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
    _mesa_Enable(GL_VERTEX_ARRAY);
-   _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+   intel_meta_set_default_texrect(intel);
+
    CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4));
 
+   intel_meta_restore_texcoords(intel);
    intel_meta_restore_transform(intel);
 
    _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
@@ -208,7 +198,6 @@ intel_stencil_drawpixels(GLcontext * ctx,
    struct intel_context *intel = intel_context(ctx);
    GLuint texname, rb_name, fb_name, old_fb_name;
    GLfloat vertices[4][2];
-   GLfloat texcoords[4][2];
    struct intel_renderbuffer *irb;
    struct intel_renderbuffer *depth_irb;
    struct gl_renderbuffer *rb;
@@ -343,7 +332,6 @@ intel_stencil_drawpixels(GLcontext * ctx,
    _mesa_free(stencil_pixels);
 
    intel_meta_set_passthrough_transform(intel);
-
    vertices[0][0] = x;
    vertices[0][1] = y;
    vertices[1][0] = x + width * ctx->Pixel.ZoomX;
@@ -353,22 +341,13 @@ intel_stencil_drawpixels(GLcontext * ctx,
    vertices[3][0] = x;
    vertices[3][1] = y + height * ctx->Pixel.ZoomY;
 
-   texcoords[0][0] = 0.0;
-   texcoords[0][1] = 0.0;
-   texcoords[1][0] = 1.0;
-   texcoords[1][1] = 0.0;
-   texcoords[2][0] = 1.0;
-   texcoords[2][1] = 1.0;
-   texcoords[3][0] = 0.0;
-   texcoords[3][1] = 1.0;
-
    _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
-   _mesa_ClientActiveTextureARB(GL_TEXTURE0);
-   _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
    _mesa_Enable(GL_VERTEX_ARRAY);
-   _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+   intel_meta_set_default_texrect(intel);
+
    CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4));
 
+   intel_meta_restore_texcoords(intel);
    intel_meta_restore_transform(intel);
 
    _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index 0aa5b8c02c9..534e75efe1f 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -52,12 +52,66 @@
 
 #define FILE_DEBUG_FLAG DEBUG_REGION
 
+/* This should be set to the maximum backtrace size desired.
+ * Set it to 0 to disable backtrace debugging.
+ */
+#define DEBUG_BACKTRACE_SIZE 0
+
+#if DEBUG_BACKTRACE_SIZE == 0
+/* Use the standard debug output */
+#define _DBG(...) DBG(__VA_ARGS__)
+#else
+/* Use backtracing debug output */
+#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);}
+
+/* Backtracing debug support */
+#include <execinfo.h>
+
+static void
+debug_backtrace(void)
+{
+   void *trace[DEBUG_BACKTRACE_SIZE];
+   char **strings = NULL;
+   int traceSize;
+   register int i;
+
+   traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE);
+   strings = backtrace_symbols(trace, traceSize);
+   if (strings == NULL) {
+      DBG("no backtrace:");
+      return;
+   }
+
+   /* Spit out all the strings with a colon separator.  Ignore
+    * the first, since we don't really care about the call
+    * to debug_backtrace() itself.  Skip until the final "/" in
+    * the trace to avoid really long lines.
+    */
+   for (i = 1; i < traceSize; i++) {
+      char *p = strings[i], *slash = strings[i];
+      while (*p) {
+         if (*p++ == '/') {
+            slash = p;
+         }
+      }
+
+      DBG("%s:", slash);
+   }
+
+   /* Free up the memory, and we're done */
+   free(strings);
+}
+
+#endif
+
+
+
 /* XXX: Thread safety?
  */
 GLubyte *
 intel_region_map(struct intel_context *intel, struct intel_region *region)
 {
-   DBG("%s\n", __FUNCTION__);
+   _DBG("%s %p\n", __FUNCTION__, region);
    if (!region->map_refcount++) {
       if (region->pbo)
          intel_region_cow(intel, region);
@@ -72,7 +126,7 @@ intel_region_map(struct intel_context *intel, struct intel_region *region)
 void
 intel_region_unmap(struct intel_context *intel, struct intel_region *region)
 {
-   DBG("%s\n", __FUNCTION__);
+   _DBG("%s %p\n", __FUNCTION__, region);
    if (!--region->map_refcount) {
       dri_bo_unmap(region->buffer);
       region->map = NULL;
@@ -87,10 +141,10 @@ intel_region_alloc_internal(struct intel_context *intel,
 {
    struct intel_region *region;
 
-   DBG("%s\n", __FUNCTION__);
-
-   if (buffer == NULL)
+   if (buffer == NULL) {
+      _DBG("%s <-- NULL\n", __FUNCTION__);
       return NULL;
+   }
 
    region = calloc(sizeof(*region), 1);
    region->cpp = cpp;
@@ -104,6 +158,7 @@ intel_region_alloc_internal(struct intel_context *intel,
    region->tiling = I915_TILING_NONE;
    region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
 
+   _DBG("%s <-- %p\n", __FUNCTION__, region);
    return region;
 }
 
@@ -158,7 +213,7 @@ void
 intel_region_reference(struct intel_region **dst, struct intel_region *src)
 {
    if (src)
-      DBG("%s %p %d\n", __FUNCTION__, src, src->refcount);
+      _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount);
 
    assert(*dst == NULL);
    if (src) {
@@ -172,10 +227,12 @@ intel_region_release(struct intel_region **region_handle)
 {
    struct intel_region *region = *region_handle;
 
-   if (region == NULL)
+   if (region == NULL) {
+      _DBG("%s NULL\n", __FUNCTION__);
       return;
+   }
 
-   DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
+   _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
 
    ASSERT(region->refcount > 0);
    region->refcount--;
@@ -251,7 +308,7 @@ intel_region_data(struct intel_context *intel,
 {
    GLboolean locked = GL_FALSE;
 
-   DBG("%s\n", __FUNCTION__);
+   _DBG("%s\n", __FUNCTION__);
 
    if (intel == NULL)
       return;
@@ -293,7 +350,7 @@ intel_region_copy(struct intel_context *intel,
                   GLuint src_offset,
                   GLuint srcx, GLuint srcy, GLuint width, GLuint height)
 {
-   DBG("%s\n", __FUNCTION__);
+   _DBG("%s\n", __FUNCTION__);
 
    if (intel == NULL)
       return;
@@ -326,7 +383,7 @@ intel_region_fill(struct intel_context *intel,
                   GLuint dstx, GLuint dsty,
                   GLuint width, GLuint height, GLuint color)
 {
-   DBG("%s\n", __FUNCTION__);
+   _DBG("%s\n", __FUNCTION__);
 
    if (intel == NULL)
       return;   
@@ -356,6 +413,8 @@ intel_region_attach_pbo(struct intel_context *intel,
    if (region->pbo == pbo)
       return;
 
+   _DBG("%s %p %p\n", __FUNCTION__, region, pbo);
+
    /* If there is already a pbo attached, break the cow tie now.
     * Don't call intel_region_release_pbo() as that would
     * unnecessarily allocate a new buffer we would have to immediately
@@ -385,6 +444,7 @@ void
 intel_region_release_pbo(struct intel_context *intel,
                          struct intel_region *region)
 {
+   _DBG("%s %p\n", __FUNCTION__, region);
    assert(region->buffer == region->pbo->buffer);
    region->pbo->region = NULL;
    region->pbo = NULL;
@@ -412,7 +472,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region)
 
    assert(region->cpp * region->pitch * region->height == pbo->Base.Size);
 
-   DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size);
+   _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size);
 
    /* Now blit from the texture buffer to the new buffer: 
     */
@@ -459,6 +519,10 @@ intel_recreate_static(struct intel_context *intel,
    if (region == NULL) {
       region = calloc(sizeof(*region), 1);
       region->refcount = 1;
+      _DBG("%s creating new region %p\n", __FUNCTION__, region);
+   }
+   else {
+      _DBG("%s %p\n", __FUNCTION__, region);
    }
 
    if (intel->ctx.Visual.rgbBits == 24)
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index ae0994b183a..fbd6e1d0c36 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -158,60 +158,6 @@ timed_memcpy(void *dest, const void *src, size_t n)
 }
 #endif /* DO_DEBUG */
 
-/**
- * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
- * level).
- *
- * The texture object's miptree must be mapped.
- *
- * It would be really nice if this was just called by Mesa whenever mipmaps
- * needed to be regenerated, rather than us having to remember to do so in
- * each texture image modification path.
- *
- * This function should also include an accelerated path.
- */
-void
-intel_generate_mipmap(GLcontext *ctx, GLenum target,
-                      struct gl_texture_object *texObj)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-   int face, i;
-
-   _mesa_generate_mipmap(ctx, target, texObj);
-
-   /* Update the level information in our private data in the new images, since
-    * it didn't get set as part of a normal TexImage path.
-    */
-   for (face = 0; face < nr_faces; face++) {
-      for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
-         struct intel_texture_image *intelImage;
-
-	 intelImage = intel_texture_image(texObj->Image[face][i]);
-	 if (intelImage == NULL)
-	    break;
-
-	 intelImage->level = i;
-	 intelImage->face = face;
-	 /* Unreference the miptree to signal that the new Data is a bare
-	  * pointer from mesa.
-	  */
-	 intel_miptree_release(intel, &intelImage->mt);
-      }
-   }
-}
-
-static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
-   intel_generate_mipmap(ctx, target, texObj);
-   intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
-}
-
 void
 intelInitTextureFuncs(struct dd_function_table *functions)
 {
@@ -227,7 +173,7 @@ intelInitTextureFuncs(struct dd_function_table *functions)
    functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
    functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
    functions->GetTexImage = intelGetTexImage;
-   functions->GenerateMipmap = intelGenerateMipmap;
+   functions->GenerateMipmap = intel_generate_mipmap;
 
    /* compressed texture functions */
    functions->CompressedTexImage2D = intelCompressedTexImage2D;
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 08437aa0e2b..7c2b26ef1d4 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -158,7 +158,7 @@ do_copy_texsubimage(struct intel_context *intel,
 
    /* GL_SGIS_generate_mipmap */
    if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      ctx->Driver.GenerateMipmap(ctx, target, texObj);
+      intel_generate_mipmap(ctx, target, texObj);
    }
 
    return GL_TRUE;
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 1f192dafbe1..b71fe2a7ae0 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -316,7 +316,6 @@ intelTexImage(GLcontext * ctx,
    GLint postConvHeight = height;
    GLint texelBytes, sizeInBytes;
    GLuint dstRowStride = 0, srcRowStride = texImage->RowStride;
-   GLboolean needs_map;
 
    DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
@@ -482,15 +481,8 @@ intelTexImage(GLcontext * ctx,
 
    LOCK_HARDWARE(intel);
 
-   /* Two cases where we need a mapping of the miptree: when the user supplied
-    * data is mapped as well (non-PBO, memcpy upload) or when we're going to do
-    * (software) mipmap generation.
-    */
-   needs_map = (pixels != NULL) || (level == texObj->BaseLevel &&
-				  texObj->GenerateMipmap);
-
    if (intelImage->mt) {
-      if (needs_map)
+      if (pixels != NULL)
          texImage->Data = intel_miptree_image_map(intel,
                                                   intelImage->mt,
                                                   intelImage->face,
@@ -547,22 +539,22 @@ intelTexImage(GLcontext * ctx,
 						   format, type, pixels, unpack)) {
 	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
        }
-
-       /* GL_SGIS_generate_mipmap */
-       if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-	  intel_generate_mipmap(ctx, target, texObj);
-       }
    }
 
    _mesa_unmap_teximage_pbo(ctx, unpack);
 
    if (intelImage->mt) {
-      if (needs_map)
+      if (pixels != NULL)
          intel_miptree_image_unmap(intel, intelImage->mt);
       texImage->Data = NULL;
    }
 
    UNLOCK_HARDWARE(intel);
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target, texObj);
+   }
 }
 
 void
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index f86de568976..48104de2a9b 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -101,11 +101,6 @@ intelTexSubimage(GLcontext * ctx,
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
    }
 
-   /* GL_SGIS_generate_mipmap */
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      intel_generate_mipmap(ctx, target, texObj);
-   }
-
    _mesa_unmap_teximage_pbo(ctx, packing);
 
    if (intelImage->mt) {
@@ -114,6 +109,11 @@ intelTexSubimage(GLcontext * ctx,
    }
 
    UNLOCK_HARDWARE(intel);
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target, texObj);
+   }
 }
 
 
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
index 2fcc87c0f5a..81ee1ed022c 100644
--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -2480,11 +2480,11 @@ void r200ValidateState( GLcontext *ctx )
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    GLuint new_state = rmesa->NewGLState;
 
-   if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+   if (new_state & _NEW_BUFFERS) {
      r200UpdateDrawBuffer(ctx);
    }
 
-   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
+   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) {
       r200UpdateTextureState( ctx );
       new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
       r200UpdateLocalViewer( ctx );
@@ -2523,6 +2523,7 @@ void r200ValidateState( GLcontext *ctx )
    }
 
    if (new_state & (_NEW_PROGRAM|
+                    _NEW_PROGRAM_CONSTANTS |
    /* need to test for pretty much anything due to possible parameter bindings */
 	_NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|
 	_NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX|
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 873cde44144..2f45429cf27 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -470,7 +470,8 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
 			fp->translated = GL_TRUE;
 		if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
 			r300FragmentProgramDump(fp, &fp->code);
-		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
+		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM |
+                                          _NEW_PROGRAM_CONSTANTS);
 	}
 
 	update_params(r300, fp);
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 79f0b3625ca..07299f3b36f 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1109,7 +1109,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
 	struct gl_program_parameter_list *paramList;
 	GLuint i;
 
-	if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM)))
+	if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
 		return;
 
 	fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
@@ -2357,11 +2357,12 @@ void r300UpdateShaders(r300ContextPtr rmesa)
 			hw_tcl_on = future_hw_tcl_on = 0;
 			r300ResetHwState(rmesa);
 
-			r300UpdateStateParameters(ctx, _NEW_PROGRAM);
+			r300UpdateStateParameters(ctx, _NEW_PROGRAM |
+                                                  _NEW_PROGRAM_CONSTANTS);
 			return;
 		}
 	}
-	r300UpdateStateParameters(ctx, _NEW_PROGRAM);
+	r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
 }
 
 static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
@@ -2588,7 +2589,7 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
 	_tnl_InvalidateState(ctx, new_state);
 	_ae_invalidate_state(ctx, new_state);
 
-	if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+	if (new_state & _NEW_BUFFERS) {
 		r300UpdateDrawBuffer(ctx);
 	}
 
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
index 292573de893..300559d0b4d 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
@@ -501,7 +501,8 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
 
 		_mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
 
-		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
+		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM |
+                                          _NEW_PROGRAM_CONSTANTS);
 
 		if (RADEON_DEBUG & DEBUG_PIXEL) {
 			if (fp->translated) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index b6561001e76..4432f85691e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -2255,7 +2255,7 @@ void radeonValidateState( GLcontext *ctx )
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    GLuint new_state = rmesa->NewGLState;
 
-   if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+   if (new_state & _NEW_BUFFERS) {
      radeonUpdateDrawBuffer(ctx);
    }
 
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 305df548fa2..9a01465bdf9 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -912,8 +912,9 @@ xmesa_update_state( GLcontext *ctx, GLbitfield new_state )
    /*
     * GL_DITHER, GL_READ/DRAW_BUFFER, buffer binding state, etc. effect
     * renderbuffer span/clear funcs.
+    * Check _NEW_COLOR to detect dither enable/disable.
     */
-   if (new_state & (_NEW_COLOR | _NEW_PIXEL | _NEW_BUFFERS)) {
+   if (new_state & (_NEW_COLOR | _NEW_BUFFERS)) {
       XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
       struct xmesa_renderbuffer *front_xrb, *back_xrb;
 
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 42d1e579e08..d5c604c56a2 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -30,6 +30,26 @@
 #include "state.h"
 
 
+
+/**
+ * \return  number of bytes in array [count] of type.
+ */
+static GLsizei
+index_bytes(GLenum type, GLsizei count)
+{
+   if (type == GL_UNSIGNED_INT) {
+      return count * sizeof(GLuint);
+   }
+   else if (type == GL_UNSIGNED_BYTE) {
+      return count * sizeof(GLubyte);
+   }
+   else {
+      ASSERT(type == GL_UNSIGNED_SHORT);
+      return count * sizeof(GLushort);
+   }
+}
+
+
 /**
  * Find the max index in the given element/index buffer
  */
@@ -44,10 +64,8 @@ max_buffer_index(GLcontext *ctx, GLuint count, GLenum type,
 
    if (elementBuf->Name) {
       /* elements are in a user-defined buffer object.  need to map it */
-      map = ctx->Driver.MapBuffer(ctx,
-                                  GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                  GL_READ_ONLY,
-                                  elementBuf);
+      map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER,
+                                  GL_READ_ONLY, elementBuf);
       /* Actual address is the sum of pointers */
       indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices);
    }
@@ -70,14 +88,16 @@ max_buffer_index(GLcontext *ctx, GLuint count, GLenum type,
    }
 
    if (map) {
-      ctx->Driver.UnmapBuffer(ctx,
-                              GL_ELEMENT_ARRAY_BUFFER_ARB,
-                              ctx->Array.ElementArrayBufferObj);
+      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuf);
    }
 
    return max;
 }
 
+
+/**
+ * Check if OK to render by examining framebuffer status and vertex arrays.
+ */
 static GLboolean
 check_valid_to_render(GLcontext *ctx, char *function)
 {
@@ -105,6 +125,12 @@ check_valid_to_render(GLcontext *ctx, char *function)
    return GL_TRUE;
 }
 
+
+/**
+ * Error checking for glDrawElements().  Includes parameter checking
+ * and VBO bounds checking.
+ * \return GL_TRUE if OK to render, GL_FALSE if error found
+ */
 GLboolean
 _mesa_validate_DrawElements(GLcontext *ctx,
 			    GLenum mode, GLsizei count, GLenum type,
@@ -140,27 +166,8 @@ _mesa_validate_DrawElements(GLcontext *ctx,
    /* Vertex buffer object tests */
    if (ctx->Array.ElementArrayBufferObj->Name) {
       /* use indices in the buffer object */
-      GLuint indexBytes;
-
-      if (!ctx->Array.ElementArrayBufferObj->Size) {
-         _mesa_warning(ctx,
-                       "glDrawElements called with empty array elements buffer");
-         return GL_FALSE;
-      }
-
-      if (type == GL_UNSIGNED_INT) {
-         indexBytes = count * sizeof(GLuint);
-      }
-      else if (type == GL_UNSIGNED_BYTE) {
-         indexBytes = count * sizeof(GLubyte);
-      }
-      else {
-         ASSERT(type == GL_UNSIGNED_SHORT);
-         indexBytes = count * sizeof(GLushort);
-      }
-
       /* make sure count doesn't go outside buffer bounds */
-      if (indexBytes > (GLuint) ctx->Array.ElementArrayBufferObj->Size) {
+      if (index_bytes(type, count) > ctx->Array.ElementArrayBufferObj->Size) {
          _mesa_warning(ctx, "glDrawElements index out of buffer bounds");
          return GL_FALSE;
       }
@@ -177,6 +184,8 @@ _mesa_validate_DrawElements(GLcontext *ctx,
                                     ctx->Array.ElementArrayBufferObj);
       if (max >= ctx->Array._MaxElement) {
          /* the max element is out of bounds of one or more enabled arrays */
+         _mesa_warning(ctx, "glDrawElements() index=%u is "
+                       "out of bounds (max=%u)", max, ctx->Array._MaxElement);
          return GL_FALSE;
       }
    }
@@ -184,6 +193,12 @@ _mesa_validate_DrawElements(GLcontext *ctx,
    return GL_TRUE;
 }
 
+
+/**
+ * Error checking for glDrawRangeElements().  Includes parameter checking
+ * and VBO bounds checking.
+ * \return GL_TRUE if OK to render, GL_FALSE if error found
+ */
 GLboolean
 _mesa_validate_DrawRangeElements(GLcontext *ctx, GLenum mode,
 				 GLuint start, GLuint end,
@@ -224,21 +239,8 @@ _mesa_validate_DrawRangeElements(GLcontext *ctx, GLenum mode,
    /* Vertex buffer object tests */
    if (ctx->Array.ElementArrayBufferObj->Name) {
       /* use indices in the buffer object */
-      GLuint indexBytes;
-
-      if (type == GL_UNSIGNED_INT) {
-         indexBytes = count * sizeof(GLuint);
-      }
-      else if (type == GL_UNSIGNED_BYTE) {
-         indexBytes = count * sizeof(GLubyte);
-      }
-      else {
-         ASSERT(type == GL_UNSIGNED_SHORT);
-         indexBytes = count * sizeof(GLushort);
-      }
-
       /* make sure count doesn't go outside buffer bounds */
-      if (indexBytes > ctx->Array.ElementArrayBufferObj->Size) {
+      if (index_bytes(type, count) > ctx->Array.ElementArrayBufferObj->Size) {
          _mesa_warning(ctx, "glDrawRangeElements index out of buffer bounds");
          return GL_FALSE;
       }
@@ -265,6 +267,7 @@ _mesa_validate_DrawRangeElements(GLcontext *ctx, GLenum mode,
 /**
  * Called from the tnl module to error check the function parameters and
  * verify that we really can draw something.
+ * \return GL_TRUE if OK to render, GL_FALSE if error found
  */
 GLboolean
 _mesa_validate_DrawArrays(GLcontext *ctx,
diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 0fa5f0de551..8ec73b9526c 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -198,7 +198,8 @@ init_array(GLcontext *ctx,
    array->Normalized = GL_FALSE;
 #if FEATURE_ARB_vertex_buffer_object
    /* Vertex array buffers */
-   array->BufferObj = ctx->Array.NullBufferObj;
+   _mesa_reference_buffer_object(ctx, &array->BufferObj,
+                                 ctx->Shared->NullBufferObj);
 #endif
 }
 
@@ -242,8 +243,8 @@ _mesa_initialize_array_object( GLcontext *ctx,
 /**
  * Add the given array object to the array object pool.
  */
-void
-_mesa_save_array_object( GLcontext *ctx, struct gl_array_object *obj )
+static void
+save_array_object( GLcontext *ctx, struct gl_array_object *obj )
 {
    if (obj->Name > 0) {
       /* insert into hash table */
@@ -256,8 +257,8 @@ _mesa_save_array_object( GLcontext *ctx, struct gl_array_object *obj )
  * Remove the given array object from the array object pool.
  * Do not deallocate the array object though.
  */
-void
-_mesa_remove_array_object( GLcontext *ctx, struct gl_array_object *obj )
+static void
+remove_array_object( GLcontext *ctx, struct gl_array_object *obj )
 {
    if (obj->Name > 0) {
       /* remove from hash table */
@@ -311,7 +312,7 @@ _mesa_BindVertexArrayAPPLE( GLuint id )
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindVertexArrayAPPLE");
             return;
          }
-         _mesa_save_array_object(ctx, newObj);
+         save_array_object(ctx, newObj);
       }
    }
 
@@ -360,7 +361,7 @@ _mesa_DeleteVertexArraysAPPLE(GLsizei n, const GLuint *ids)
 	 }
 
 	 /* The ID is immediately freed for re-use */
-	 _mesa_remove_array_object(ctx, obj);
+	 remove_array_object(ctx, obj);
 
          /* Unreference the array object. 
           * If refcount hits zero, the object will be deleted.
@@ -414,7 +415,7 @@ _mesa_GenVertexArraysAPPLE(GLsizei n, GLuint *arrays)
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenVertexArraysAPPLE");
          return;
       }
-      _mesa_save_array_object(ctx, obj);
+      save_array_object(ctx, obj);
       arrays[i] = first + i;
    }
 
diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h
index 9c4036af5a7..90c2aea1555 100644
--- a/src/mesa/main/arrayobj.h
+++ b/src/mesa/main/arrayobj.h
@@ -56,12 +56,6 @@ extern void
 _mesa_initialize_array_object( GLcontext *ctx,
                                struct gl_array_object *obj, GLuint name );
 
-extern void
-_mesa_save_array_object( GLcontext *ctx, struct gl_array_object *obj );
-
-extern void
-_mesa_remove_array_object( GLcontext *ctx, struct gl_array_object *obj );
-
 
 
 /*
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index c8d160baa9a..1f2070ef473 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -194,7 +194,7 @@ _mesa_reference_buffer_object(GLcontext *ctx,
       return;
 
    if (*ptr) {
-      /* Unreference the old texture */
+      /* Unreference the old buffer */
       GLboolean deleteFlag = GL_FALSE;
       struct gl_buffer_object *oldObj = *ptr;
 
@@ -227,7 +227,7 @@ _mesa_reference_buffer_object(GLcontext *ctx,
    ASSERT(!*ptr);
 
    if (bufObj) {
-      /* reference new texture */
+      /* reference new buffer */
       /*_glthread_LOCK_MUTEX(tex->Mutex);*/
       if (bufObj->RefCount == 0) {
          /* this buffer's being deleted (look just above) */
@@ -389,7 +389,6 @@ _mesa_buffer_map( GLcontext *ctx, GLenum target, GLenum access,
    (void) ctx;
    (void) target;
    (void) access;
-   ASSERT(!bufObj->OnCard);
    /* Just return a direct pointer to the data */
    if (bufObj->Pointer) {
       /* already mapped! */
@@ -413,7 +412,6 @@ _mesa_buffer_unmap( GLcontext *ctx, GLenum target,
 {
    (void) ctx;
    (void) target;
-   ASSERT(!bufObj->OnCard);
    /* XXX we might assert here that bufObj->Pointer is non-null */
    bufObj->Pointer = NULL;
    return GL_TRUE;
@@ -426,16 +424,8 @@ _mesa_buffer_unmap( GLcontext *ctx, GLenum target,
 void
 _mesa_init_buffer_objects( GLcontext *ctx )
 {
-   /* Allocate the default buffer object and set refcount so high that
-    * it never gets deleted.
-    * XXX with recent/improved refcounting this may not longer be needed.
-    */
-   ctx->Array.NullBufferObj = _mesa_new_buffer_object(ctx, 0, 0);
-   if (ctx->Array.NullBufferObj)
-      ctx->Array.NullBufferObj->RefCount = 1000;
-
-   ctx->Array.ArrayBufferObj = ctx->Array.NullBufferObj;
-   ctx->Array.ElementArrayBufferObj = ctx->Array.NullBufferObj;
+   ctx->Array.ArrayBufferObj = ctx->Shared->NullBufferObj;
+   ctx->Array.ElementArrayBufferObj = ctx->Shared->NullBufferObj;
 }
 
 
@@ -479,7 +469,7 @@ bind_buffer_object(GLcontext *ctx, GLenum target, GLuint buffer)
       /* The spec says there's not a buffer object named 0, but we use
        * one internally because it simplifies things.
        */
-      newBufObj = ctx->Array.NullBufferObj;
+      newBufObj = ctx->Shared->NullBufferObj;
    }
    else {
       /* non-default buffer object */
@@ -746,7 +736,7 @@ unbind(GLcontext *ctx,
        struct gl_buffer_object *obj)
 {
    if (*ptr == obj) {
-      _mesa_reference_buffer_object(ctx, ptr, ctx->Array.NullBufferObj);
+      _mesa_reference_buffer_object(ctx, ptr, ctx->Shared->NullBufferObj);
    }
 }
 
@@ -958,8 +948,12 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size,
       bufObj->Pointer = NULL;
    }  
 
+   FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT);
+
    ASSERT(ctx->Driver.BufferData);
 
+   bufObj->Written = GL_TRUE;
+
    /* Give the buffer object to the driver!  <data> may be null! */
    ctx->Driver.BufferData( ctx, target, size, data, usage, bufObj );
 }
@@ -980,6 +974,8 @@ _mesa_BufferSubDataARB(GLenum target, GLintptrARB offset,
       return;
    }
 
+   bufObj->Written = GL_TRUE;
+
    ASSERT(ctx->Driver.BufferSubData);
    ctx->Driver.BufferSubData( ctx, target, offset, size, data, bufObj );
 }
@@ -1044,6 +1040,8 @@ _mesa_MapBufferARB(GLenum target, GLenum access)
    }
 
    bufObj->Access = access;
+   if (access == GL_WRITE_ONLY_ARB || access == GL_READ_WRITE_ARB)
+      bufObj->Written = GL_TRUE;
 
    return bufObj->Pointer;
 }
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index c5f13345f04..d8b5f3b1f4a 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -443,7 +443,7 @@ _mesa_readbuffer(GLcontext *ctx, GLenum buffer, GLint bufferIndex)
    fb->ColorReadBuffer = buffer;
    fb->_ColorReadBufferIndex = bufferIndex;
 
-   ctx->NewState |= _NEW_PIXEL;
+   ctx->NewState |= _NEW_BUFFERS;
 }
 
 
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 2a9fdf9ca05..888e08ff934 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -187,7 +187,7 @@
 #define MAX_PROGRAM_TEMPS 256
 #define MAX_PROGRAM_ADDRESS_REGS 2
 #define MAX_UNIFORMS 1024  /**< number of vec4 uniforms */
-#define MAX_VARYING 8      /**< number of float[4] vectors */
+#define MAX_VARYING 16     /**< number of float[4] vectors */
 #define MAX_SAMPLERS MAX_TEXTURE_IMAGE_UNITS
 #define MAX_PROGRAM_INPUTS 32
 #define MAX_PROGRAM_OUTPUTS 32
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index ec0dc12a3eb..5e0f2d7b1bd 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -602,6 +602,10 @@ _mesa_init_constants(GLcontext *ctx)
    ASSERT(MAX_NV_VERTEX_PROGRAM_TEMPS <= MAX_PROGRAM_TEMPS);
    ASSERT(MAX_NV_VERTEX_PROGRAM_INPUTS <= VERT_ATTRIB_MAX);
    ASSERT(MAX_NV_VERTEX_PROGRAM_OUTPUTS <= VERT_RESULT_MAX);
+
+   /* check that we don't exceed various 32-bit bitfields */
+   ASSERT(VERT_RESULT_MAX <= 32);
+   ASSERT(FRAG_ATTRIB_MAX <= 32);
 }
 
 
@@ -1007,10 +1011,6 @@ _mesa_free_context_data( GLcontext *ctx )
 
    _mesa_delete_array_object(ctx, ctx->Array.DefaultArrayObj);
 
-#if FEATURE_ARB_vertex_buffer_object
-   _mesa_delete_buffer_object(ctx, ctx->Array.NullBufferObj);
-#endif
-
    /* free dispatch tables */
    _mesa_free(ctx->Exec);
    _mesa_free(ctx->Save);
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index f432be183cb..2e7baa48ff5 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -222,14 +222,16 @@ get_texcoord_unit(GLcontext *ctx)
 
 /**
  * Helper function to enable or disable a texture target.
+ * \param bit  one of the TEXTURE_x_BIT values
+ * \return GL_TRUE if state is changing or GL_FALSE if no change
  */
 static GLboolean
-enable_texture(GLcontext *ctx, GLboolean state, GLbitfield bit)
+enable_texture(GLcontext *ctx, GLboolean state, GLbitfield texBit)
 {
    const GLuint curr = ctx->Texture.CurrentUnit;
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[curr];
-   const GLuint newenabled = (!state)
-       ? (texUnit->Enabled & ~bit) :  (texUnit->Enabled | bit);
+   const GLbitfield newenabled = state
+      ? (texUnit->Enabled | texBit) : (texUnit->Enabled & ~texBit);
 
    if (!ctx->DrawBuffer->Visual.rgbMode || texUnit->Enabled == newenabled)
        return GL_FALSE;
diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c
index ddae456fa12..ea76ed04e4f 100644
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -181,6 +181,8 @@ _mesa_sizeof_type( GLenum type )
 	 return sizeof(GLint);
       case GL_FLOAT:
 	 return sizeof(GLfloat);
+      case GL_DOUBLE:
+	 return sizeof(GLdouble);
       case GL_HALF_FLOAT_ARB:
 	 return sizeof(GLhalfARB);
       default:
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index bc8658beffe..47db2acdf06 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -1478,9 +1478,12 @@ next_mipmap_level_size(GLenum target, GLint border,
 
 
 /**
- * For GL_SGIX_generate_mipmap:
- * Generate a complete set of mipmaps from texObj's base-level image.
+ * Automatic mipmap generation.
+ * This is the fallback/default function for ctx->Driver.GenerateMipmap().
+ * Generate a complete set of mipmaps from texObj's BaseLevel image.
  * Stop at texObj's MaxLevel or when we get to the 1x1 texture.
+ * For cube maps, target will be one of
+ * GL_TEXTURE_CUBE_MAP_POSITIVE/NEGATIVE_X/Y/Z; never GL_TEXTURE_CUBE_MAP.
  */
 void
 _mesa_generate_mipmap(GLcontext *ctx, GLenum target,
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 50dc2def87f..d11df535f24 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1028,7 +1028,7 @@ struct gl_stencil_attrib
 
 /**
  * An index for each type of texture object.  These correspond to the GL
- * target target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc.
+ * texture target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc.
  * Note: the order is from highest priority to lowest priority.
  */
 typedef enum
@@ -1505,7 +1505,7 @@ struct gl_buffer_object
    GLsizeiptr Length;        /**< mapped length */
    GLsizeiptrARB Size;       /**< Size of storage in bytes */
    GLubyte *Data;            /**< Location of storage either in RAM or VRAM. */
-   GLboolean OnCard;         /**< Is buffer in VRAM? (hardware drivers) */
+   GLboolean Written;        /**< Ever written to? (for debugging) */
 };
 
 
@@ -1541,10 +1541,10 @@ struct gl_client_array
    const GLubyte *Ptr;          /**< Points to array data */
    GLboolean Enabled;		/**< Enabled flag is a boolean */
    GLboolean Normalized;        /**< GL_ARB_vertex_program */
+   GLuint _ElementSize;         /**< size of each element in bytes */
 
-   /**< GL_ARB_vertex_buffer_object */
-   struct gl_buffer_object *BufferObj;
-   GLuint _MaxElement;
+   struct gl_buffer_object *BufferObj;/**< GL_ARB_vertex_buffer_object */
+   GLuint _MaxElement;          /**< max element index into array buffer */
 };
 
 
@@ -1586,7 +1586,10 @@ struct gl_array_object
  */
 struct gl_array_attrib
 {
+   /** Currently bound array object. See _mesa_BindVertexArrayAPPLE() */
    struct gl_array_object *ArrayObj;
+
+   /** The default vertex array object */
    struct gl_array_object *DefaultArrayObj;
 
    GLint ActiveTexture;		/**< Client Active Texture */
@@ -1596,7 +1599,6 @@ struct gl_array_attrib
    GLbitfield NewState;		/**< mask of _NEW_ARRAY_* values */
 
 #if FEATURE_ARB_vertex_buffer_object
-   struct gl_buffer_object *NullBufferObj;
    struct gl_buffer_object *ArrayBufferObj;
    struct gl_buffer_object *ElementArrayBufferObj;
 #endif
@@ -2053,6 +2055,9 @@ struct gl_shared_state
    /** Default texture objects (shared by all texture units) */
    struct gl_texture_object *DefaultTex[NUM_TEXTURE_TARGETS];
 
+   /** Fallback texture used when a bound texture is incomplete */
+   struct gl_texture_object *FallbackTex;
+
    /**
     * \name Thread safety and statechange notification for texture
     * objects. 
@@ -2064,6 +2069,8 @@ struct gl_shared_state
    GLuint TextureStateStamp;	        /**< state notification for shared tex */
    /*@}*/
 
+   /** Default buffer object for vertex arrays that aren't in VBOs */
+   struct gl_buffer_object *NullBufferObj;
 
    /**
     * \name Vertex/fragment programs
@@ -2619,6 +2626,7 @@ struct gl_matrix_stack
 #define _NEW_PROGRAM            0x8000000  /**< __GLcontextRec::VertexProgram */
 #define _NEW_CURRENT_ATTRIB     0x10000000  /**< __GLcontextRec::Current */
 #define _NEW_PROGRAM_CONSTANTS  0x20000000
+#define _NEW_BUFFER_OBJECT      0x40000000
 #define _NEW_ALL ~0
 /*@}*/
 
diff --git a/src/mesa/main/pixel.c b/src/mesa/main/pixel.c
index 57ae9c721a8..d9f3e476e81 100644
--- a/src/mesa/main/pixel.c
+++ b/src/mesa/main/pixel.c
@@ -170,7 +170,7 @@ _mesa_PixelMapfv( GLenum map, GLsizei mapsize, const GLfloat *values )
          return;
       }
       /* restore */
-      ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj;
+      ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj;
       buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
                                               GL_READ_ONLY_ARB,
                                               ctx->Unpack.BufferObj);
@@ -229,7 +229,7 @@ _mesa_PixelMapuiv(GLenum map, GLsizei mapsize, const GLuint *values )
          return;
       }
       /* restore */
-      ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj;
+      ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj;
       buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
                                               GL_READ_ONLY_ARB,
                                               ctx->Unpack.BufferObj);
@@ -303,7 +303,7 @@ _mesa_PixelMapusv(GLenum map, GLsizei mapsize, const GLushort *values )
          return;
       }
       /* restore */
-      ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj;
+      ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj;
       buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
                                               GL_READ_ONLY_ARB,
                                               ctx->Unpack.BufferObj);
@@ -371,7 +371,7 @@ _mesa_GetPixelMapfv( GLenum map, GLfloat *values )
          return;
       }
       /* restore */
-      ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj;
+      ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj;
       buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
                                               GL_WRITE_ONLY_ARB,
                                               ctx->Pack.BufferObj);
@@ -432,7 +432,7 @@ _mesa_GetPixelMapuiv( GLenum map, GLuint *values )
          return;
       }
       /* restore */
-      ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj;
+      ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj;
       buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
                                               GL_WRITE_ONLY_ARB,
                                               ctx->Pack.BufferObj);
@@ -494,7 +494,7 @@ _mesa_GetPixelMapusv( GLenum map, GLushort *values )
          return;
       }
       /* restore */
-      ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj;
+      ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj;
       buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
                                               GL_WRITE_ONLY_ARB,
                                               ctx->Pack.BufferObj);
diff --git a/src/mesa/main/pixelstore.c b/src/mesa/main/pixelstore.c
index ff1a6344cc9..6a641f83f27 100644
--- a/src/mesa/main/pixelstore.c
+++ b/src/mesa/main/pixelstore.c
@@ -245,7 +245,8 @@ _mesa_init_pixelstore( GLcontext *ctx )
    ctx->Pack.ClientStorage = GL_FALSE;
    ctx->Pack.Invert = GL_FALSE;
 #if FEATURE_EXT_pixel_buffer_object
-   ctx->Pack.BufferObj = ctx->Array.NullBufferObj;
+   _mesa_reference_buffer_object(ctx, &ctx->Pack.BufferObj,
+                                 ctx->Shared->NullBufferObj);
 #endif
    ctx->Unpack.Alignment = 4;
    ctx->Unpack.RowLength = 0;
@@ -258,7 +259,8 @@ _mesa_init_pixelstore( GLcontext *ctx )
    ctx->Unpack.ClientStorage = GL_FALSE;
    ctx->Unpack.Invert = GL_FALSE;
 #if FEATURE_EXT_pixel_buffer_object
-   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+   _mesa_reference_buffer_object(ctx, &ctx->Unpack.BufferObj,
+                                 ctx->Shared->NullBufferObj);
 #endif
 
    /*
@@ -278,6 +280,7 @@ _mesa_init_pixelstore( GLcontext *ctx )
    ctx->DefaultPacking.ClientStorage = GL_FALSE;
    ctx->DefaultPacking.Invert = GL_FALSE;
 #if FEATURE_EXT_pixel_buffer_object
-   ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj;
+   _mesa_reference_buffer_object(ctx, &ctx->DefaultPacking.BufferObj,
+                                 ctx->Shared->NullBufferObj);
 #endif
 }
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index ae8dd2a8364..759883743da 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -33,6 +33,7 @@
 #include "mtypes.h"
 #include "hash.h"
 #include "arrayobj.h"
+#include "bufferobj.h"
 #include "shared.h"
 #include "shader/program.h"
 #include "shader/shader_api.h"
@@ -92,6 +93,13 @@ _mesa_alloc_shared_state(GLcontext *ctx)
    shared->BufferObjects = _mesa_NewHashTable();
 #endif
 
+   /* Allocate the default buffer object and set refcount so high that
+    * it never gets deleted.
+    * XXX with recent/improved refcounting this may not longer be needed.
+    */
+   shared->NullBufferObj = _mesa_new_buffer_object(ctx, 0, 0);
+   shared->NullBufferObj->RefCount = 1000;
+
    shared->ArrayObjects = _mesa_NewHashTable();
 
    /* Create default texture objects */
@@ -341,6 +349,10 @@ _mesa_free_shared_state(GLcontext *ctx, struct gl_shared_state *shared)
    _mesa_DeleteHashTable(shared->RenderBuffers);
 #endif
 
+#if FEATURE_ARB_vertex_buffer_object
+   _mesa_delete_buffer_object(ctx, shared->NullBufferObj);
+#endif
+
    /*
     * Free texture objects (after FBOs since some textures might have
     * been bound to FBOs).
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index f18fc8f6837..a6411f7b621 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -64,110 +64,145 @@ update_separate_specular(GLcontext *ctx)
 
 
 /**
- * Update state dependent on vertex arrays.
+ * Compute the index of the last array element that can be safely accessed
+ * in a vertex array.  We can really only do this when the array lives in
+ * a VBO.
+ * The array->_MaxElement field will be updated.
+ * Later in glDrawArrays/Elements/etc we can do some bounds checking.
+ */
+static void
+compute_max_element(struct gl_client_array *array)
+{
+   assert(array->Enabled);
+   if (array->BufferObj->Name) {
+      /* Compute the max element we can access in the VBO without going
+       * out of bounds.
+       */
+      array->_MaxElement = ((GLsizeiptrARB) array->BufferObj->Size
+                            - (GLsizeiptrARB) array->Ptr + array->StrideB
+                            - array->_ElementSize) / array->StrideB;
+   }
+   else {
+      /* user-space array, no idea how big it is */
+      array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */
+   }
+}
+
+
+/**
+ * Helper for update_arrays().
+ * \return  min(current min, array->_MaxElement).
+ */
+static GLuint
+update_min(GLuint min, struct gl_client_array *array)
+{
+   compute_max_element(array);
+   return MIN2(min, array->_MaxElement);
+}
+
+
+/**
+ * Update ctx->Array._MaxElement (the max legal index into all enabled arrays).
+ * Need to do this upon new array state or new buffer object state.
  */
 static void
 update_arrays( GLcontext *ctx )
 {
-   GLuint i, min;
+   struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
+   GLuint i, min = ~0;
 
    /* find min of _MaxElement values for all enabled arrays */
 
    /* 0 */
    if (ctx->VertexProgram._Current
-       && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) {
-      min = ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_POS]._MaxElement;
+       && arrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) {
+      min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_POS]);
    }
-   else if (ctx->Array.ArrayObj->Vertex.Enabled) {
-      min = ctx->Array.ArrayObj->Vertex._MaxElement;
-   }
-   else {
-      /* can't draw anything without vertex positions! */
-      min = 0;
+   else if (arrayObj->Vertex.Enabled) {
+      min = update_min(min, &arrayObj->Vertex);
    }
 
    /* 1 */
    if (ctx->VertexProgram._Enabled
-       && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]._MaxElement);
+       && arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) {
+      min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]);
    }
    /* no conventional vertex weight array */
 
    /* 2 */
    if (ctx->VertexProgram._Enabled
-       && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]._MaxElement);
+       && arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
+      min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]);
    }
-   else if (ctx->Array.ArrayObj->Normal.Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->Normal._MaxElement);
+   else if (arrayObj->Normal.Enabled) {
+      min = update_min(min, &arrayObj->Normal);
    }
 
    /* 3 */
    if (ctx->VertexProgram._Enabled
-       && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]._MaxElement);
+       && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
+      min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]);
    }
-   else if (ctx->Array.ArrayObj->Color.Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->Color._MaxElement);
+   else if (arrayObj->Color.Enabled) {
+      min = update_min(min, &arrayObj->Color);
    }
 
    /* 4 */
    if (ctx->VertexProgram._Enabled
-       && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]._MaxElement);
+       && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) {
+      min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]);
    }
-   else if (ctx->Array.ArrayObj->SecondaryColor.Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->SecondaryColor._MaxElement);
+   else if (arrayObj->SecondaryColor.Enabled) {
+      min = update_min(min, &arrayObj->SecondaryColor);
    }
 
    /* 5 */
    if (ctx->VertexProgram._Enabled
-       && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_FOG]._MaxElement);
+       && arrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) {
+      min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_FOG]);
    }
-   else if (ctx->Array.ArrayObj->FogCoord.Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->FogCoord._MaxElement);
+   else if (arrayObj->FogCoord.Enabled) {
+      min = update_min(min, &arrayObj->FogCoord);
    }
 
    /* 6 */
    if (ctx->VertexProgram._Enabled
-       && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]._MaxElement);
+       && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) {
+      min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]);
    }
-   else if (ctx->Array.ArrayObj->Index.Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->Index._MaxElement);
+   else if (arrayObj->Index.Enabled) {
+      min = update_min(min, &arrayObj->Index);
    }
 
-
    /* 7 */
    if (ctx->VertexProgram._Enabled
-       && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]._MaxElement);
+       && arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) {
+      min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]);
    }
 
    /* 8..15 */
    for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) {
       if (ctx->VertexProgram._Enabled
-          && ctx->Array.ArrayObj->VertexAttrib[i].Enabled) {
-         min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[i]._MaxElement);
+          && arrayObj->VertexAttrib[i].Enabled) {
+         min = update_min(min, &arrayObj->VertexAttrib[i]);
       }
       else if (i - VERT_ATTRIB_TEX0 < ctx->Const.MaxTextureCoordUnits
-               && ctx->Array.ArrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) {
-         min = MIN2(min, ctx->Array.ArrayObj->TexCoord[i - VERT_ATTRIB_TEX0]._MaxElement);
+               && arrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) {
+         min = update_min(min, &arrayObj->TexCoord[i - VERT_ATTRIB_TEX0]);
       }
    }
 
    /* 16..31 */
    if (ctx->VertexProgram._Current) {
       for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
-         if (ctx->Array.ArrayObj->VertexAttrib[i].Enabled) {
-            min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[i]._MaxElement);
+         if (arrayObj->VertexAttrib[i].Enabled) {
+            min = update_min(min, &arrayObj->VertexAttrib[i]);
          }
       }
    }
 
-   if (ctx->Array.ArrayObj->EdgeFlag.Enabled) {
-      min = MIN2(min, ctx->Array.ArrayObj->EdgeFlag._MaxElement);
+   if (arrayObj->EdgeFlag.Enabled) {
+      min = update_min(min, &arrayObj->EdgeFlag);
    }
 
    /* _MaxElement is one past the last legal array element */
@@ -547,7 +582,7 @@ _mesa_update_state_locked( GLcontext *ctx )
    if (new_state & _DD_NEW_SEPARATE_SPECULAR)
       update_separate_specular( ctx );
 
-   if (new_state & (_NEW_ARRAY | _NEW_PROGRAM))
+   if (new_state & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT))
       update_arrays( ctx );
 
    if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT))
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index b63f747fe8d..0024efc0e64 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -662,6 +662,59 @@ _mesa_test_texobj_completeness( const GLcontext *ctx,
    }
 }
 
+
+/**
+ * Return pointer to a default/fallback texture.
+ * The texture is a 2D 8x8 RGBA texture with all texels = (0,0,0,1).
+ * That's the value a sampler should get when sampling from an
+ * incomplete texture.
+ */
+struct gl_texture_object *
+_mesa_get_fallback_texture(GLcontext *ctx)
+{
+   if (!ctx->Shared->FallbackTex) {
+      /* create fallback texture now */
+      static GLubyte texels[8 * 8][4];
+      struct gl_texture_object *texObj;
+      struct gl_texture_image *texImage;
+      GLuint i;
+
+      for (i = 0; i < 8 * 8; i++) {
+         texels[i][0] =
+         texels[i][1] =
+         texels[i][2] = 0x0;
+         texels[i][3] = 0xff;
+      }
+
+      /* create texture object */
+      texObj = ctx->Driver.NewTextureObject(ctx, 0, GL_TEXTURE_2D);
+      assert(texObj->RefCount == 1);
+      texObj->MinFilter = GL_NEAREST;
+      texObj->MagFilter = GL_NEAREST;
+
+      /* create level[0] texture image */
+      texImage = _mesa_get_tex_image(ctx, texObj, GL_TEXTURE_2D, 0);
+
+      /* init the image fields */
+      _mesa_init_teximage_fields(ctx, GL_TEXTURE_2D, texImage,
+                                    8, 8, 1, 0, GL_RGBA); 
+
+      /* set image data */
+      ctx->Driver.TexImage2D(ctx, GL_TEXTURE_2D, 0, GL_RGBA,
+                             8, 8, 0,
+                             GL_RGBA, GL_UNSIGNED_BYTE, texels,
+                             &ctx->DefaultPacking, texObj, texImage);
+
+      _mesa_test_texobj_completeness(ctx, texObj);
+      assert(texObj->_Complete);
+
+      ctx->Shared->FallbackTex = texObj;
+   }
+   return ctx->Shared->FallbackTex;
+}
+
+
+
 /*@}*/
 
 
diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h
index d5374c5d6c4..2599c0816a9 100644
--- a/src/mesa/main/texobj.h
+++ b/src/mesa/main/texobj.h
@@ -65,6 +65,9 @@ extern void
 _mesa_test_texobj_completeness( const GLcontext *ctx,
                                 struct gl_texture_object *obj );
 
+extern struct gl_texture_object *
+_mesa_get_fallback_texture(GLcontext *ctx);
+
 extern void
 _mesa_unlock_context_textures( GLcontext *ctx );
 
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index e60ab6aa123..2195a334d3e 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -469,8 +469,10 @@ set_tex_parameterf(GLcontext *ctx,
          return GL_TRUE;
       }
       else {
-         _mesa_error(ctx, GL_INVALID_ENUM,
-                     "glTexParameter(pname=GL_TEXTURE_MAX_ANISOTROPY_EXT)");
+         static GLuint count = 0;
+         if (count++ < 10)
+            _mesa_error(ctx, GL_INVALID_ENUM,
+                        "glTexParameter(pname=GL_TEXTURE_MAX_ANISOTROPY_EXT)");
       }
       return GL_FALSE;
 
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index cef58d7a496..6e0c0c688a1 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -561,8 +561,19 @@ update_texture_state( GLcontext *ctx )
       }
 
       if (!texUnit->_ReallyEnabled) {
-         _mesa_reference_texobj(&texUnit->_Current, NULL);
-         continue;
+         if (fprog) {
+            /* If we get here it means the shader is expecting a texture
+             * object, but there isn't one (or it's incomplete).  Use the
+             * fallback texture.
+             */
+            struct gl_texture_object *texObj = _mesa_get_fallback_texture(ctx);
+            texUnit->_ReallyEnabled = 1 << TEXTURE_2D_INDEX;
+            _mesa_reference_texobj(&texUnit->_Current, texObj);
+         }
+         else {
+            /* fixed-function: texture unit is really disabled */
+            continue;
+         }
       }
 
       /* if we get here, we know this texture unit is enabled */
diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 72b3e834b3a..a9c9162be1b 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -63,22 +63,11 @@ update_array(GLcontext *ctx, struct gl_client_array *array,
    array->StrideB = stride ? stride : elementSize;
    array->Normalized = normalized;
    array->Ptr = (const GLubyte *) ptr;
-#if FEATURE_ARB_vertex_buffer_object
+   array->_ElementSize = elementSize;
+
    _mesa_reference_buffer_object(ctx, &array->BufferObj,
                                  ctx->Array.ArrayBufferObj);
 
-   /* Compute the index of the last array element that's inside the buffer.
-    * Later in glDrawArrays we'll check if start + count > _MaxElement to
-    * be sure we won't go out of bounds.
-    */
-   if (ctx->Array.ArrayBufferObj->Name)
-      array->_MaxElement = ((GLsizeiptrARB) ctx->Array.ArrayBufferObj->Size
-                            - (GLsizeiptrARB) array->Ptr + array->StrideB
-                            - elementSize) / array->StrideB;
-   else
-#endif
-      array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */
-
    ctx->NewState |= _NEW_ARRAY;
    ctx->Array.NewState |= dirtyBit;
 }
diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h
index 072037bbd7f..d4d3dd1a94e 100644
--- a/src/mesa/main/version.h
+++ b/src/mesa/main/version.h
@@ -1,8 +1,9 @@
 /*
  * Mesa 3-D graphics library
- * Version:  7.5
+ * Version:  7.6
  *
  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -29,9 +30,9 @@
 
 /* Mesa version */
 #define MESA_MAJOR 7
-#define MESA_MINOR 5
+#define MESA_MINOR 6
 #define MESA_PATCH 0
-#define MESA_VERSION_STRING "7.5-rc1"
+#define MESA_VERSION_STRING "7.6-devel"
 
 /* To make version comparison easy */
 #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
diff --git a/src/mesa/math/m_vector.c b/src/mesa/math/m_vector.c
index c5e2fd1de12..4cbab11a358 100644
--- a/src/mesa/math/m_vector.c
+++ b/src/mesa/math/m_vector.c
@@ -1,4 +1,3 @@
-
 /*
  * Mesa 3-D graphics library
  * Version:  3.5
@@ -37,11 +36,12 @@
 
 
 
-/*
+/**
  * Given a vector [count][4] of floats, set all the [][elt] values
  * to 0 (if elt = 0, 1, 2) or 1.0 (if elt = 3).
  */
-void _mesa_vector4f_clean_elem( GLvector4f *vec, GLuint count, GLuint elt )
+void
+_mesa_vector4f_clean_elem( GLvector4f *vec, GLuint count, GLuint elt )
 {
    static const GLubyte elem_bits[4] = {
       VEC_DIRTY_0,
@@ -54,12 +54,13 @@ void _mesa_vector4f_clean_elem( GLvector4f *vec, GLuint count, GLuint elt )
    GLfloat (*data)[4] = (GLfloat (*)[4])vec->start;
    GLuint i;
 
-   for (i = 0 ; i < count ; i++)
+   for (i = 0; i < count; i++)
       data[i][elt] = v;
 
    vec->flags &= ~elem_bits[elt];
 }
 
+
 static const GLubyte size_bits[5] = {
    0,
    VEC_SIZE_1,
@@ -69,61 +70,53 @@ static const GLubyte size_bits[5] = {
 };
 
 
-
-/*
+/**
  * Initialize GLvector objects.
- * Input: v - the vector object to initialize.
- *        flags - bitwise-OR of VEC_* flags
- *        storage - pointer to storage for the vector's data
+ * \param v  the vector object to initialize.
+ * \param flags  bitwise-OR of VEC_* flags
+ * \param storage  pointer to storage for the vector's data
  */
-
-
-void _mesa_vector4f_init( GLvector4f *v, GLuint flags, GLfloat (*storage)[4] )
+void
+_mesa_vector4f_init( GLvector4f *v, GLbitfield flags, GLfloat (*storage)[4] )
 {
    v->stride = 4 * sizeof(GLfloat);
    v->size = 2;   /* may change: 2-4 for vertices and 1-4 for texcoords */
    v->data = storage;
    v->start = (GLfloat *) storage;
    v->count = 0;
-   v->flags = size_bits[4] | flags ;
+   v->flags = size_bits[4] | flags;
 }
 
 
-
-
-/*
+/**
  * Initialize GLvector objects and allocate storage.
- * Input: v - the vector object
- *        sz - unused????
- *        flags - bitwise-OR of VEC_* flags
- *        count - number of elements to allocate in vector
- *        alignment - desired memory alignment for the data (in bytes)
+ * \param v  the vector object
+ * \param flags  bitwise-OR of VEC_* flags
+ * \param count  number of elements to allocate in vector
+ * \param alignment  desired memory alignment for the data (in bytes)
  */
-
-
-void _mesa_vector4f_alloc( GLvector4f *v, GLuint flags, GLuint count,
-			GLuint alignment )
+void
+_mesa_vector4f_alloc( GLvector4f *v, GLbitfield flags, GLuint count,
+                      GLuint alignment )
 {
    v->stride = 4 * sizeof(GLfloat);
    v->size = 2;
    v->storage = ALIGN_MALLOC( count * 4 * sizeof(GLfloat), alignment );
+   v->storage_count = count;
    v->start = (GLfloat *) v->storage;
    v->data = (GLfloat (*)[4]) v->storage;
    v->count = 0;
-   v->flags = size_bits[4] | flags | VEC_MALLOC ;
+   v->flags = size_bits[4] | flags | VEC_MALLOC;
 }
 
 
-
-
-/*
+/**
  * Vector deallocation.  Free whatever memory is pointed to by the
  * vector's storage field if the VEC_MALLOC flag is set.
  * DO NOT free the GLvector object itself, though.
  */
-
-
-void _mesa_vector4f_free( GLvector4f *v )
+void
+_mesa_vector4f_free( GLvector4f *v )
 {
    if (v->flags & VEC_MALLOC) {
       ALIGN_FREE( v->storage );
@@ -135,13 +128,15 @@ void _mesa_vector4f_free( GLvector4f *v )
 }
 
 
-/*
+/**
  * For debugging
  */
-void _mesa_vector4f_print( GLvector4f *v, GLubyte *cullmask, GLboolean culling )
+void
+_mesa_vector4f_print( const GLvector4f *v, const GLubyte *cullmask,
+                      GLboolean culling )
 {
-   GLfloat c[4] = { 0, 0, 0, 1 };
-   const char *templates[5] = {
+   static const GLfloat c[4] = { 0, 0, 0, 1 };
+   static const char *templates[5] = {
       "%d:\t0, 0, 0, 1\n",
       "%d:\t%f, 0, 0, 1\n",
       "%d:\t%f, %f, 0, 1\n",
@@ -154,30 +149,32 @@ void _mesa_vector4f_print( GLvector4f *v, GLubyte *cullmask, GLboolean culling )
    GLuint j, i = 0, count;
 
    _mesa_printf("data-start\n");
-   for ( ; d != v->start ; STRIDE_F(d, v->stride), i++)
+   for (; d != v->start; STRIDE_F(d, v->stride), i++)
       _mesa_printf(t, i, d[0], d[1], d[2], d[3]);
 
    _mesa_printf("start-count(%u)\n", v->count);
    count = i + v->count;
 
    if (culling) {
-      for ( ; i < count ; STRIDE_F(d, v->stride), i++)
+      for (; i < count; STRIDE_F(d, v->stride), i++)
 	 if (cullmask[i])
 	    _mesa_printf(t, i, d[0], d[1], d[2], d[3]);
    }
    else {
-      for ( ; i < count ; STRIDE_F(d, v->stride), i++)
+      for (; i < count; STRIDE_F(d, v->stride), i++)
 	 _mesa_printf(t, i, d[0], d[1], d[2], d[3]);
    }
 
-   for (j = v->size ; j < 4; j++) {
+   for (j = v->size; j < 4; j++) {
       if ((v->flags & (1<<j)) == 0) {
 
 	 _mesa_printf("checking col %u is clean as advertised ", j);
 
-	 for (i = 0, d = (GLfloat *) v->data ;
-	      i < count && d[j] == c[j] ;
-	      i++, STRIDE_F(d, v->stride)) {};
+	 for (i = 0, d = (GLfloat *) v->data;
+	      i < count && d[j] == c[j];
+	      i++, STRIDE_F(d, v->stride)) {
+            /* no-op */
+         }
 
 	 if (i == count)
 	    _mesa_printf(" --> ok\n");
@@ -186,5 +183,3 @@ void _mesa_vector4f_print( GLvector4f *v, GLubyte *cullmask, GLboolean culling )
       }
    }
 }
-
-
diff --git a/src/mesa/math/m_vector.h b/src/mesa/math/m_vector.h
index 647388ac7d0..71281d57589 100644
--- a/src/mesa/math/m_vector.h
+++ b/src/mesa/math/m_vector.h
@@ -31,7 +31,6 @@
 #define _M_VECTOR_H_
 
 #include "main/glheader.h"
-#include "main/mtypes.h"		/* hack for GLchan */
 
 
 #define VEC_DIRTY_0        0x1
@@ -50,7 +49,8 @@
 
 
 
-/* Wrap all the information about vectors up in a struct.  Has
+/**
+ * Wrap all the information about vectors up in a struct.  Has
  * additional fields compared to the other vectors to help us track of
  * different vertex sizes, and whether we need to clean columns out
  * because they contain non-(0,0,0,1) values.
@@ -60,29 +60,27 @@
  * the transformation routines.
  */
 typedef struct {
-   GLfloat (*data)[4];	/* may be malloc'd or point to client data */
-   GLfloat *start;	/* points somewhere inside of <data> */
-   GLuint count;	/* size of the vector (in elements) */
-   GLuint stride;	/* stride from one element to the next (in bytes) */
-   GLuint size;		/* 2-4 for vertices and 1-4 for texcoords */
-   GLuint flags;	/* which columns are dirty */
-   void *storage;	/* self-allocated storage */
+   GLfloat (*data)[4];	/**< may be malloc'd or point to client data */
+   GLfloat *start;	/**< points somewhere inside of <data> */
+   GLuint count;	/**< size of the vector (in elements) */
+   GLuint stride;	/**< stride from one element to the next (in bytes) */
+   GLuint size;		/**< 2-4 for vertices and 1-4 for texcoords */
+   GLbitfield flags;	/**< bitmask of VEC_x flags */
+   void *storage;	/**< self-allocated storage */
+   GLuint storage_count; /**< storage size in elements */
 } GLvector4f;
 
 
-extern void _mesa_vector4f_init( GLvector4f *v, GLuint flags,
+extern void _mesa_vector4f_init( GLvector4f *v, GLbitfield flags,
 			      GLfloat (*storage)[4] );
-extern void _mesa_vector4f_alloc( GLvector4f *v, GLuint flags,
+extern void _mesa_vector4f_alloc( GLvector4f *v, GLbitfield flags,
 			       GLuint count, GLuint alignment );
 extern void _mesa_vector4f_free( GLvector4f *v );
-extern void _mesa_vector4f_print( GLvector4f *v, GLubyte *, GLboolean );
+extern void _mesa_vector4f_print( const GLvector4f *v, const GLubyte *, GLboolean );
 extern void _mesa_vector4f_clean_elem( GLvector4f *vec, GLuint nr, GLuint elt );
 
 
-
-
-
-/*
+/**
  * Given vector <v>, return a pointer (cast to <type *> to the <i>-th element.
  *
  * End up doing a lot of slow imuls if not careful.
diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c
index c7a031067ef..36267c336f6 100644
--- a/src/mesa/shader/arbprogparse.c
+++ b/src/mesa/shader/arbprogparse.c
@@ -563,6 +563,7 @@ struct var_cache
                                  * we take up with our state tokens or constants. Note that
                                  * this is _not_ the same as the number of param registers
                                  * we eventually use */
+   GLuint swizzle;              /**< swizzle to access this variable */
    struct var_cache *next;
 };
 
@@ -581,6 +582,7 @@ var_cache_create (struct var_cache **va)
       (**va).param_binding_begin = ~0;
       (**va).param_binding_length = ~0;
       (**va).alias_binding = NULL;
+      (**va).swizzle = SWIZZLE_XYZW;
       (**va).next = NULL;
    }
 }
@@ -872,15 +874,16 @@ parse_signed_float (const GLubyte ** inst, struct arb_program *Program)
  * This picks out a constant value from the parsed array. The constant vector is r
  * returned in the *values array, which should be of length 4.
  *
- * \param values - The 4 component vector with the constant value in it
+ * \param values - return the vector constant values.
+ * \param size - returns the number elements in valuesOut [1..4]
  */
 static GLvoid
-parse_constant (const GLubyte ** inst, GLfloat *values, struct arb_program *Program,
-                GLboolean use)
+parse_constant(const GLubyte ** inst, GLfloat *values, GLint *size,
+               struct arb_program *Program,
+               GLboolean use)
 {
    GLuint components, i;
 
-
    switch (*(*inst)++) {
       case CONSTANT_SCALAR:
          if (use == GL_TRUE) {
@@ -893,7 +896,7 @@ parse_constant (const GLubyte ** inst, GLfloat *values, struct arb_program *Prog
                values[1] =
                values[2] = values[3] = parse_signed_float (inst, Program);
          }
-
+         *size = 1;
          break;
       case CONSTANT_VECTOR:
          values[0] = values[1] = values[2] = 0;
@@ -902,7 +905,12 @@ parse_constant (const GLubyte ** inst, GLfloat *values, struct arb_program *Prog
          for (i = 0; i < components; i++) {
             values[i] = parse_signed_float (inst, Program);
          }
+         *size = 4;
          break;
+      default:
+         _mesa_problem(NULL, "unexpected case in parse_constant()");
+         values[0] = 0.0F;
+         *size = 0;
    }
 }
 
@@ -1816,7 +1824,6 @@ parse_param_elements (GLcontext * ctx, const GLubyte ** inst,
    GLint idx;
    GLuint err = 0;
    gl_state_index state_tokens[STATE_LENGTH] = {0, 0, 0, 0, 0};
-   GLfloat const_values[4];
 
    GLubyte token = *(*inst)++;
 
@@ -1908,18 +1915,31 @@ parse_param_elements (GLcontext * ctx, const GLubyte ** inst,
 
       case PARAM_CONSTANT:
          /* parsing something like {1.0, 2.0, 3.0, 4.0} */
-         parse_constant (inst, const_values, Program, use);
-         idx = _mesa_add_named_constant(Program->Base.Parameters,
-                                        (char *) param_var->name,
-                                        const_values, 4);
-         if (param_var->param_binding_begin == ~0U)
-            param_var->param_binding_begin = idx;
-         param_var->param_binding_type = PROGRAM_STATE_VAR;
-         /* Note: when we reference this parameter in an instruction later,
-          * we'll check if it's really a constant/immediate and set the
-          * instruction register type appropriately.
-          */
-         param_var->param_binding_length++;
+         {
+            GLfloat const_values[4];
+            GLint size;
+            parse_constant(inst, const_values, &size, Program, use);
+            if (param_var->name[0] == ' ') {
+               /* this is an unnamed constant */
+               idx = _mesa_add_unnamed_constant(Program->Base.Parameters,
+                                                const_values, size,
+                                                &param_var->swizzle);
+            }
+            else {
+               /* named parameter/constant */
+               idx = _mesa_add_named_constant(Program->Base.Parameters,
+                                              (char *) param_var->name,
+                                              const_values, size);
+            }
+            if (param_var->param_binding_begin == ~0U)
+               param_var->param_binding_begin = idx;
+            param_var->param_binding_type = PROGRAM_STATE_VAR;
+            /* Note: when we reference this parameter in an instruction later,
+             * we'll check if it's really a constant/immediate and set the
+             * instruction register type appropriately.
+             */
+            param_var->param_binding_length++;
+         }
          break;
 
       default:
@@ -2428,6 +2448,9 @@ parse_swizzle_mask(const GLubyte ** inst, GLubyte *swizzle, GLint len)
             return;
       }
    }
+
+   if (len == 1)
+      swizzle[1] = swizzle[2] = swizzle[3] = swizzle[0];
 }
 
 
@@ -2482,7 +2505,7 @@ static GLuint
 parse_src_reg (GLcontext * ctx, const GLubyte ** inst,
                struct var_cache **vc_head,
                struct arb_program *Program,
-               gl_register_file * File, GLint * Index,
+               gl_register_file * File, GLint * Index, GLuint *swizzle,
                GLboolean *IsRelOffset )
 {
    struct var_cache *src;
@@ -2491,6 +2514,8 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst,
 
    *IsRelOffset = 0;
 
+   *swizzle = SWIZZLE_XYZW; /* default */
+
    /* And the binding for the src */
    switch (*(*inst)++) {
       case REGISTER_ATTRIB:
@@ -2546,6 +2571,7 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst,
                      }
 
                      *Index = src->param_binding_begin + offset;
+                     *swizzle = src->swizzle;
                      break;
 
                   case ARRAY_INDEX_RELATIVE:
@@ -2568,6 +2594,7 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst,
                         /* And store it properly */
                         *Index = src->param_binding_begin + rel_off;
                         *IsRelOffset = 1;
+                        *swizzle = src->swizzle;
                      }
                      break;
                }
@@ -2579,6 +2606,7 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst,
 
                *File = (gl_register_file) src->param_binding_type;
                *Index = src->param_binding_begin;
+               *swizzle = src->swizzle;
                break;
          }
          break;
@@ -2647,6 +2675,21 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst,
 }
 
 
+static GLuint
+swizzle_swizzle(GLuint baseSwizzle, const GLubyte swizzle[4])
+{
+   GLuint i, swz, s[4];
+   for (i = 0; i < 4; i++) {
+      GLuint c = swizzle[i];
+      if (c <= SWIZZLE_W)
+         s[i] = GET_SWZ(baseSwizzle, c);
+      else
+         s[i] = c;
+   }
+   swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
+   return swz;
+}
+
 /**
  * Parse vertex/fragment program vector source register.
  */
@@ -2661,12 +2704,14 @@ parse_vector_src_reg(GLcontext *ctx, const GLubyte **inst,
    GLubyte negateMask;
    GLubyte swizzle[4];
    GLboolean isRelOffset;
+   GLuint baseSwizzle;
 
    /* Grab the sign */
    negateMask = (parse_sign (inst) == -1) ? NEGATE_XYZW : NEGATE_NONE;
 
    /* And the src reg */
-   if (parse_src_reg(ctx, inst, vc_head, program, &file, &index, &isRelOffset))
+   if (parse_src_reg(ctx, inst, vc_head, program, &file, &index, &baseSwizzle,
+                     &isRelOffset))
       return 1;
 
    /* finally, the swizzle */
@@ -2674,7 +2719,7 @@ parse_vector_src_reg(GLcontext *ctx, const GLubyte **inst,
 
    reg->File = file;
    reg->Index = index;
-   reg->Swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+   reg->Swizzle = swizzle_swizzle(baseSwizzle, swizzle);
    reg->Negate = negateMask;
    reg->RelAddr = isRelOffset;
    return 0;
@@ -2695,12 +2740,14 @@ parse_scalar_src_reg(GLcontext *ctx, const GLubyte **inst,
    GLubyte negateMask;
    GLubyte swizzle[4];
    GLboolean isRelOffset;
+   GLuint baseSwizzle;
 
    /* Grab the sign */
    negateMask = (parse_sign (inst) == -1) ? NEGATE_XYZW : NEGATE_NONE;
 
    /* And the src reg */
-   if (parse_src_reg(ctx, inst, vc_head, program, &file, &index, &isRelOffset))
+   if (parse_src_reg(ctx, inst, vc_head, program, &file, &index, &baseSwizzle,
+                     &isRelOffset))
       return 1;
 
    /* finally, the swizzle */
@@ -2708,7 +2755,7 @@ parse_scalar_src_reg(GLcontext *ctx, const GLubyte **inst,
 
    reg->File = file;
    reg->Index = index;
-   reg->Swizzle = (swizzle[0] << 0);
+   reg->Swizzle = swizzle_swizzle(baseSwizzle, swizzle);
    reg->Negate = negateMask;
    reg->RelAddr = isRelOffset;
    return 0;
@@ -3019,8 +3066,10 @@ parse_fp_instruction (GLcontext * ctx, const GLubyte ** inst,
 	    GLubyte negateMask;
             gl_register_file file;
 	    GLint index;
+            GLuint baseSwizzle;
 
-	    if (parse_src_reg(ctx, inst, vc_head, Program, &file, &index, &rel))
+	    if (parse_src_reg(ctx, inst, vc_head, Program, &file, &index,
+                              &baseSwizzle, &rel))
 	       return 1;
 	    parse_extended_swizzle_mask(inst, swizzle, &negateMask);
 	    fp->SrcReg[0].File = file;
@@ -3360,11 +3409,13 @@ parse_vp_instruction (GLcontext * ctx, const GLubyte ** inst,
 	    GLboolean relAddr;
             gl_register_file file;
 	    GLint index;
+            GLuint baseSwizzle;
 
 	    if (parse_dst_reg(ctx, inst, vc_head, Program, &vp->DstReg))
 	       return 1;
 
-	    if (parse_src_reg(ctx, inst, vc_head, Program, &file, &index, &relAddr))
+	    if (parse_src_reg(ctx, inst, vc_head, Program, &file, &index,
+                              &baseSwizzle, &relAddr))
 	       return 1;
 	    parse_extended_swizzle_mask (inst, swizzle, &negateMask);
 	    vp->SrcReg[0].File = file;
diff --git a/src/mesa/shader/arbprogram.c b/src/mesa/shader/arbprogram.c
index 981565ab8f1..317d623a228 100644
--- a/src/mesa/shader/arbprogram.c
+++ b/src/mesa/shader/arbprogram.c
@@ -74,8 +74,6 @@ _mesa_BindProgram(GLenum target, GLuint id)
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
-
    /* Error-check target and get curProg */
    if ((target == GL_VERTEX_PROGRAM_ARB) && /* == GL_VERTEX_PROGRAM_NV */
         (ctx->Extensions.NV_vertex_program ||
@@ -132,6 +130,9 @@ _mesa_BindProgram(GLenum target, GLuint id)
       return;
    }
 
+   /* signal new program (and its new constants) */
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+
    /* bind newProg */
    if (target == GL_VERTEX_PROGRAM_ARB) { /* == GL_VERTEX_PROGRAM_NV */
       _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
@@ -489,7 +490,7 @@ _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index,
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
 
    if (target == GL_FRAGMENT_PROGRAM_ARB
        && ctx->Extensions.ARB_fragment_program) {
@@ -537,7 +538,7 @@ _mesa_ProgramEnvParameters4fvEXT(GLenum target, GLuint index, GLsizei count,
    GLfloat * dest;
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
 
    if (count <= 0) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameters4fv(count)");
@@ -631,7 +632,7 @@ _mesa_ProgramLocalParameter4fARB(GLenum target, GLuint index,
    struct gl_program *prog;
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
 
    if ((target == GL_FRAGMENT_PROGRAM_NV
         && ctx->Extensions.NV_fragment_program) ||
@@ -685,7 +686,7 @@ _mesa_ProgramLocalParameters4fvEXT(GLenum target, GLuint index, GLsizei count,
    GLint i;
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
 
    if (count <= 0) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameters4fv(count)");
diff --git a/src/mesa/shader/nvprogram.c b/src/mesa/shader/nvprogram.c
index 5142c2a4a59..8ba521182b8 100644
--- a/src/mesa/shader/nvprogram.c
+++ b/src/mesa/shader/nvprogram.c
@@ -706,7 +706,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
 
    prog = _mesa_lookup_program(ctx, id);
    if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) {
diff --git a/src/mesa/shader/prog_instruction.c b/src/mesa/shader/prog_instruction.c
index ae3a003feed..44c961927a3 100644
--- a/src/mesa/shader/prog_instruction.c
+++ b/src/mesa/shader/prog_instruction.c
@@ -343,7 +343,10 @@ _mesa_opcode_string(gl_inst_opcode opcode)
 {
    if (opcode < MAX_OPCODE)
       return InstInfo[opcode].Name;
-   else
-      return "OP?";
+   else {
+      static char s[20];
+      _mesa_snprintf(s, sizeof(s), "OP%u", opcode);
+      return s;
+   }
 }
 
diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c
index 6ba2e76ff96..be903106a08 100644
--- a/src/mesa/shader/prog_optimize.c
+++ b/src/mesa/shader/prog_optimize.c
@@ -547,15 +547,13 @@ update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
 
 
 /**
- * Find the live intervals for each temporary register in the program.
- * For register R, the interval [A,B] indicates that R is referenced
- * from instruction A through instruction B.
- * Special consideration is needed for loops and subroutines.
- * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
+ * Find first/last instruction that references each temporary register.
  */
-static GLboolean
-find_live_intervals(struct gl_program *prog,
-                    struct interval_list *liveIntervals)
+GLboolean
+_mesa_find_temp_intervals(const struct prog_instruction *instructions,
+                          GLuint numInstructions,
+                          GLint intBegin[MAX_PROGRAM_TEMPS],
+                          GLint intEnd[MAX_PROGRAM_TEMPS])
 {
    struct loop_info
    {
@@ -563,26 +561,15 @@ find_live_intervals(struct gl_program *prog,
    };
    struct loop_info loopStack[MAX_LOOP_NESTING];
    GLuint loopStackDepth = 0;
-   GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
    GLuint i;
 
-   /*
-    * Note: we'll return GL_FALSE below if we find relative indexing
-    * into the TEMP register file.  We can't handle that yet.
-    * We also give up on subroutines for now.
-    */
-
-   if (dbg) {
-      _mesa_printf("Optimize: Begin find intervals\n");
-   }
-
    for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
       intBegin[i] = intEnd[i] = -1;
    }
 
    /* Scan instructions looking for temporary registers */
-   for (i = 0; i < prog->NumInstructions; i++) {
-      const struct prog_instruction *inst = prog->Instructions + i;
+   for (i = 0; i < numInstructions; i++) {
+      const struct prog_instruction *inst = instructions + i;
       if (inst->Opcode == OPCODE_BGNLOOP) {
          loopStack[loopStackDepth].Start = i;
          loopStack[loopStackDepth].End = inst->BranchTarget;
@@ -595,7 +582,7 @@ find_live_intervals(struct gl_program *prog,
          return GL_FALSE;
       }
       else {
-         const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+         const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
          GLuint j;
          for (j = 0; j < numSrc; j++) {
             if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
@@ -624,6 +611,39 @@ find_live_intervals(struct gl_program *prog,
       }
    }
 
+   return GL_TRUE;
+}
+
+
+/**
+ * Find the live intervals for each temporary register in the program.
+ * For register R, the interval [A,B] indicates that R is referenced
+ * from instruction A through instruction B.
+ * Special consideration is needed for loops and subroutines.
+ * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
+ */
+static GLboolean
+find_live_intervals(struct gl_program *prog,
+                    struct interval_list *liveIntervals)
+{
+   GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
+   GLuint i;
+
+   /*
+    * Note: we'll return GL_FALSE below if we find relative indexing
+    * into the TEMP register file.  We can't handle that yet.
+    * We also give up on subroutines for now.
+    */
+
+   if (dbg) {
+      _mesa_printf("Optimize: Begin find intervals\n");
+   }
+
+   /* build intermediate arrays */
+   if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
+                                  intBegin, intEnd))
+      return GL_FALSE;
+
    /* Build live intervals list from intermediate arrays */
    liveIntervals->Num = 0;
    for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
@@ -792,8 +812,6 @@ _mesa_reallocate_registers(struct gl_program *prog)
 }
 
 
-
-
 /**
  * Apply optimizations to the given program to eliminate unnecessary
  * instructions, temp regs, etc.
diff --git a/src/mesa/shader/prog_optimize.h b/src/mesa/shader/prog_optimize.h
index d102cfd9fc1..43894a27237 100644
--- a/src/mesa/shader/prog_optimize.h
+++ b/src/mesa/shader/prog_optimize.h
@@ -25,7 +25,19 @@
 #ifndef PROG_OPT_H
 #define PROG_OPT_H
 
+
+#include "main/config.h"
+
+
 struct gl_program;
+struct prog_instruction;
+
+
+extern GLboolean
+_mesa_find_temp_intervals(const struct prog_instruction *instructions,
+                          GLuint numInstructions,
+                          GLint intBegin[MAX_PROGRAM_TEMPS],
+                          GLint intEnd[MAX_PROGRAM_TEMPS]);
 
 extern void
 _mesa_optimize_program(GLcontext *ctx, struct gl_program *program);
diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c
index 9967f2978de..de7fef1f861 100644
--- a/src/mesa/shader/prog_print.c
+++ b/src/mesa/shader/prog_print.c
@@ -75,7 +75,11 @@ file_string(gl_register_file f, gl_prog_print_mode mode)
    case PROGRAM_UNDEFINED:
       return "UNDEFINED";
    default:
-      return "Unknown program file!";
+      {
+         static char s[20];
+         _mesa_snprintf(s, sizeof(s), "FILE%u", f);
+         return s;
+      }
    }
 }
 
@@ -736,7 +740,10 @@ _mesa_fprint_instruction_opt(FILE *f,
                                 mode, prog);
       }
       else {
-         _mesa_fprintf(f, "Other opcode %d\n", inst->Opcode);
+         fprint_alu_instruction(f, inst,
+                                _mesa_opcode_string(inst->Opcode),
+                                3/*_mesa_num_inst_src_regs(inst->Opcode)*/,
+                                mode, prog);
       }
       break;
    }
@@ -941,6 +948,10 @@ _mesa_write_shader_to_file(const struct gl_shader *shader)
       fprintf(f, "/*\n");
       _mesa_fprint_program_opt(f, shader->Program, PROG_PRINT_DEBUG, GL_TRUE);
       fprintf(f, "*/\n");
+      fprintf(f, "/* Parameters / constants */\n");
+      fprintf(f, "/*\n");
+      _mesa_fprint_parameter_list(f, shader->Program->Parameters);
+      fprintf(f, "*/\n");
    }
 
    fclose(f);
diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c
index 644cd39185c..a8390d30942 100644
--- a/src/mesa/shader/shader_api.c
+++ b/src/mesa/shader/shader_api.c
@@ -1487,7 +1487,7 @@ _mesa_use_program(GLcontext *ctx, GLuint program)
       return;
    }
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
 
    if (program) {
       shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram");
@@ -1509,6 +1509,10 @@ _mesa_use_program(GLcontext *ctx, GLuint program)
                          shProg->Shaders[i]->Name,
                          shProg->Shaders[i]->Type);
          }
+         if (shProg->VertexProgram)
+            printf(" vert prog %u\n", shProg->VertexProgram->Base.Id);
+         if (shProg->FragmentProgram)
+            printf(" frag prog %u\n", shProg->FragmentProgram->Base.Id);
       }
    }
    else {
@@ -1789,7 +1793,7 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count,
       return;
    }
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
 
    uniform = &shProg->Uniforms->Uniforms[location];
 
@@ -1929,7 +1933,7 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows,
       return;
    }
 
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
 
    uniform = &shProg->Uniforms->Uniforms[location];
 
diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c
index 1fdf4db054c..2bc8809661d 100644
--- a/src/mesa/shader/slang/slang_link.c
+++ b/src/mesa/shader/slang/slang_link.c
@@ -97,7 +97,8 @@ bits_agree(GLbitfield flags1, GLbitfield flags2, GLbitfield bit)
  * which inputs are centroid-sampled, invariant, etc.
  */
 static GLboolean
-link_varying_vars(struct gl_shader_program *shProg, struct gl_program *prog)
+link_varying_vars(GLcontext *ctx,
+                  struct gl_shader_program *shProg, struct gl_program *prog)
 {
    GLuint *map, i, firstVarying, newFile;
    GLbitfield *inOutFlags;
@@ -156,8 +157,12 @@ link_varying_vars(struct gl_shader_program *shProg, struct gl_program *prog)
                                var->Flags);
       }
 
+      if (shProg->Varying->NumParameters > ctx->Const.MaxVarying) {
+         link_error(shProg, "Too many varying variables");
+         return GL_FALSE;
+      }
+
       /* Map varying[i] to varying[j].
-       * Plus, set prog->Input/OutputFlags[] as described above.
        * Note: the loop here takes care of arrays or large (sz>4) vars.
        */
       {
@@ -712,6 +717,8 @@ _slang_link(GLcontext *ctx,
       struct gl_vertex_program *linked_vprog =
          vertex_program(_mesa_clone_program(ctx, &vertProg->Base));
       shProg->VertexProgram = linked_vprog; /* refcount OK */
+      /* vertex program ID not significant; just set Id for debugging purposes */
+      shProg->VertexProgram->Base.Id = shProg->Name;
       ASSERT(shProg->VertexProgram->Base.RefCount == 1);
    }
 
@@ -720,16 +727,18 @@ _slang_link(GLcontext *ctx,
       struct gl_fragment_program *linked_fprog = 
          fragment_program(_mesa_clone_program(ctx, &fragProg->Base));
       shProg->FragmentProgram = linked_fprog; /* refcount OK */
+      /* vertex program ID not significant; just set Id for debugging purposes */
+      shProg->FragmentProgram->Base.Id = shProg->Name;
       ASSERT(shProg->FragmentProgram->Base.RefCount == 1);
    }
 
    /* link varying vars */
    if (shProg->VertexProgram) {
-      if (!link_varying_vars(shProg, &shProg->VertexProgram->Base))
+      if (!link_varying_vars(ctx, shProg, &shProg->VertexProgram->Base))
          return;
    }
    if (shProg->FragmentProgram) {
-      if (!link_varying_vars(shProg, &shProg->FragmentProgram->Base))
+      if (!link_varying_vars(ctx, shProg, &shProg->FragmentProgram->Base))
          return;
    }
 
diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c
index 536293683e9..4d897b677e0 100644
--- a/src/mesa/state_tracker/st_atom_framebuffer.c
+++ b/src/mesa/state_tracker/st_atom_framebuffer.c
@@ -122,6 +122,7 @@ update_framebuffer_state( struct st_context *st )
                                    strb->surface);
             framebuffer->nr_cbufs++;
          }
+         strb->defined = GL_TRUE; /* we'll be drawing something */
       }
    }
    for (i = framebuffer->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; i++) {
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index a94e11fff12..f5d802055f5 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -28,6 +28,7 @@
 
 #include "main/imports.h"
 #include "main/mtypes.h"
+#include "main/arrayobj.h"
 #include "main/bufferobj.h"
 
 #include "st_inlines.h"
@@ -307,4 +308,8 @@ st_init_bufferobject_functions(struct dd_function_table *functions)
    functions->MapBufferRange = st_bufferobj_map_range;
    functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = st_bufferobj_unmap;
+
+   /* For GL_APPLE_vertex_array_object */
+   functions->NewArrayObject = _mesa_new_array_object;
+   functions->DeleteArrayObject = _mesa_delete_array_object;
 }
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index e003b6db5cf..c249f3b3578 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -125,6 +125,8 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
    strb->Base.Height = height;
    init_renderbuffer_bits(strb, template.format);
 
+   strb->defined = GL_FALSE;  /* undefined contents now */
+
    /* Probably need dedicated flags for surface usage too: 
     */
    surface_usage = (PIPE_BUFFER_USAGE_GPU_READ |
@@ -462,6 +464,134 @@ st_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
 }
 
 
+/**
+ * Copy back color buffer to front color buffer.
+ */
+static void
+copy_back_to_front(struct st_context *st,
+                   struct gl_framebuffer *fb,
+                   gl_buffer_index frontIndex,
+                   gl_buffer_index backIndex)
+
+{
+   struct st_framebuffer *stfb = (struct st_framebuffer *) fb;
+   struct pipe_surface *surf_front, *surf_back;
+
+   (void) st_get_framebuffer_surface(stfb, frontIndex, &surf_front);
+   (void) st_get_framebuffer_surface(stfb, backIndex, &surf_back);
+
+   if (surf_front && surf_back) {
+      st->pipe->surface_copy(st->pipe,
+                             surf_front, 0, 0,  /* dest */
+                             surf_back, 0, 0,   /* src */
+                             fb->Width, fb->Height);
+   }
+}
+
+
+/**
+ * Check if we're drawing into, or read from, a front color buffer.  If the
+ * front buffer is missing, create it now.
+ *
+ * The back color buffer must exist since we'll use its format/samples info
+ * for creating the front buffer.
+ *
+ * \param frontIndex  either BUFFER_FRONT_LEFT or BUFFER_FRONT_RIGHT
+ * \param backIndex  either BUFFER_BACK_LEFT or BUFFER_BACK_RIGHT
+ */
+static void
+check_create_front_buffer(GLcontext *ctx, struct gl_framebuffer *fb,
+                          gl_buffer_index frontIndex,
+                          gl_buffer_index backIndex)
+{
+   if (fb->Attachment[frontIndex].Renderbuffer == NULL) {
+      GLboolean create = GL_FALSE;
+
+      /* check if drawing to or reading from front buffer */
+      if (fb->_ColorReadBufferIndex == frontIndex) {
+         create = GL_TRUE;
+      }
+      else {
+         GLuint b;
+         for (b = 0; b < fb->_NumColorDrawBuffers; b++) {
+            if (fb->_ColorDrawBufferIndexes[b] == frontIndex) {
+               create = GL_TRUE;
+               break;
+            }
+         }
+      }
+
+      if (create) {
+         struct st_renderbuffer *back;
+         struct gl_renderbuffer *front;
+         enum pipe_format colorFormat;
+         uint samples;
+
+         if (0)
+            _mesa_debug(ctx, "Allocate new front buffer\n");
+
+         /* get back renderbuffer info */
+         back = st_renderbuffer(fb->Attachment[backIndex].Renderbuffer);
+         colorFormat = back->format;
+         samples = back->Base.NumSamples;
+
+         /* create front renderbuffer */
+         front = st_new_renderbuffer_fb(colorFormat, samples);
+         _mesa_add_renderbuffer(fb, frontIndex, front);
+
+         /* alloc texture/surface for new front buffer */
+         front->AllocStorage(ctx, front, front->InternalFormat,
+                             fb->Width, fb->Height);
+
+         /* initialize the front color buffer contents by copying
+          * the back buffer.
+          */
+         copy_back_to_front(ctx->st, fb, frontIndex, backIndex);
+      }
+   }
+}
+
+
+/**
+ * If front left/right color buffers are missing, create them now.
+ */
+static void
+check_create_front_buffers(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+   /* check if we need to create the front left buffer now */
+   check_create_front_buffer(ctx, fb, BUFFER_FRONT_LEFT, BUFFER_BACK_LEFT);
+
+   if (fb->Visual.stereoMode) {
+      check_create_front_buffer(ctx, fb, BUFFER_FRONT_RIGHT, BUFFER_BACK_RIGHT);
+   }
+
+   st_invalidate_state(ctx, _NEW_BUFFERS);
+}
+
+
+/**
+ * Called via glDrawBuffer.
+ */
+static void
+st_DrawBuffers(GLcontext *ctx, GLsizei count, const GLenum *buffers)
+{
+   (void) count;
+   (void) buffers;
+   check_create_front_buffers(ctx, ctx->DrawBuffer);
+}
+
+
+/**
+ * Called via glReadBuffer.
+ */
+static void
+st_ReadBuffer(GLcontext *ctx, GLenum buffer)
+{
+   (void) buffer;
+   check_create_front_buffers(ctx, ctx->ReadBuffer);
+}
+
+
 void st_init_fbo_functions(struct dd_function_table *functions)
 {
    functions->NewFramebuffer = st_new_framebuffer;
@@ -474,4 +604,7 @@ void st_init_fbo_functions(struct dd_function_table *functions)
    /* no longer needed by core Mesa, drivers handle resizes...
    functions->ResizeBuffers = st_resize_buffers;
    */
+
+   functions->DrawBuffers = st_DrawBuffers;
+   functions->ReadBuffer = st_ReadBuffer;
 }
diff --git a/src/mesa/state_tracker/st_cb_fbo.h b/src/mesa/state_tracker/st_cb_fbo.h
index 44fa9fe9a4f..fd77d0a95b0 100644
--- a/src/mesa/state_tracker/st_cb_fbo.h
+++ b/src/mesa/state_tracker/st_cb_fbo.h
@@ -44,6 +44,7 @@ struct st_renderbuffer
    struct pipe_texture *texture;
    struct pipe_surface *surface; /* temporary view into texture */
    enum pipe_format format;  /** preferred format, or PIPE_FORMAT_NONE */
+   GLboolean defined;        /**< defined contents? */
 
    struct st_texture_object *rtt;  /**< GL render to texture's texture */
    int rtt_level, rtt_face, rtt_slice;
diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c
index fbaffd154f9..8ceeeabcd37 100644
--- a/src/mesa/state_tracker/st_cb_flush.c
+++ b/src/mesa/state_tracker/st_cb_flush.c
@@ -47,10 +47,19 @@
 #include "util/u_blit.h"
 
 
+/** Check if we have a front color buffer and if it's been drawn to. */
 static INLINE GLboolean
 is_front_buffer_dirty(struct st_context *st)
 {
-   return st->frontbuffer_status == FRONT_STATUS_DIRTY;
+   if (st->frontbuffer_status == FRONT_STATUS_DIRTY) {
+      return GL_TRUE;
+   }
+   else {
+      GLframebuffer *fb = st->ctx->DrawBuffer;
+      struct st_renderbuffer *strb
+         = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+      return strb && strb->defined;
+   }
 }
 
 
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 6ffed56d9a0..18adb35e872 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -45,6 +45,7 @@ struct blit_state;
 struct bitmap_cache;
 
 
+/** XXX we'd like to get rid of these */
 #define FRONT_STATUS_UNDEFINED    0
 #define FRONT_STATUS_DIRTY        1
 #define FRONT_STATUS_COPY_OF_BACK 2
@@ -111,7 +112,7 @@ struct st_context
       struct gl_fragment_program *fragment_program;
    } cb;
 
-   GLuint frontbuffer_status;  /**< one of FRONT_STATUS_ */
+   GLuint frontbuffer_status;  /**< one of FRONT_STATUS_ (XXX to be removed) */
 
    char vendor[100];
    char renderer[100];
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 8f6be507742..d526dfcf52e 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -168,6 +168,8 @@ void st_init_extensions(struct st_context *st)
    ctx->Extensions.EXT_texture_env_dot3 = GL_TRUE;
    ctx->Extensions.EXT_texture_lod_bias = GL_TRUE;
 
+   ctx->Extensions.APPLE_vertex_array_object = GL_TRUE;
+
    ctx->Extensions.NV_blend_square = GL_TRUE;
    ctx->Extensions.NV_texgen_reflection = GL_TRUE;
 
diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c
index daaad65ccaf..ef800291ccd 100644
--- a/src/mesa/state_tracker/st_framebuffer.c
+++ b/src/mesa/state_tracker/st_framebuffer.c
@@ -58,19 +58,19 @@ st_create_framebuffer( const __GLcontextModes *visual,
 
       _mesa_initialize_framebuffer(&stfb->Base, visual);
 
-      {
-         /* fake frontbuffer */
-         /* XXX allocation should only happen in the unusual case
-            it's actually needed */
+      if (visual->doubleBufferMode) {
          struct gl_renderbuffer *rb
             = st_new_renderbuffer_fb(colorFormat, samples);
-         _mesa_add_renderbuffer(&stfb->Base, BUFFER_FRONT_LEFT, rb);
+         _mesa_add_renderbuffer(&stfb->Base, BUFFER_BACK_LEFT, rb);
       }
-
-      if (visual->doubleBufferMode) {
+      else {
+         /* Only allocate front buffer right now if we're single buffered.
+          * If double-buffered, allocate front buffer on demand later.
+          * See check_create_front_buffers().
+          */
          struct gl_renderbuffer *rb
             = st_new_renderbuffer_fb(colorFormat, samples);
-         _mesa_add_renderbuffer(&stfb->Base, BUFFER_BACK_LEFT, rb);
+         _mesa_add_renderbuffer(&stfb->Base, BUFFER_FRONT_LEFT, rb);
       }
 
       if (depthFormat == stencilFormat && depthFormat != PIPE_FORMAT_NONE) {
@@ -293,6 +293,115 @@ st_notify_swapbuffers(struct st_framebuffer *stfb)
 }
 
 
+/**
+ * Swap the front/back color buffers.  Exchange the front/back pointers
+ * and update some derived state.
+ * No need to call st_notify_swapbuffers() first.
+ *
+ * For a single-buffered framebuffer, no swap occurs, but we still return
+ * the pointer(s) to the front color buffer(s).
+ *
+ * \param front_left  returns pointer to front-left renderbuffer after swap
+ * \param front_right  returns pointer to front-right renderbuffer after swap
+ */
+void
+st_swapbuffers(struct st_framebuffer *stfb,
+               struct pipe_surface **front_left,
+               struct pipe_surface **front_right)
+{
+   struct gl_framebuffer *fb = &stfb->Base;
+
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (ctx && ctx->DrawBuffer == &stfb->Base) {
+      st_flush( ctx->st, 
+		PIPE_FLUSH_RENDER_CACHE | 
+		PIPE_FLUSH_SWAPBUFFERS |
+		PIPE_FLUSH_FRAME,
+                NULL );
+   }
+
+   if (!fb->Visual.doubleBufferMode) {
+      /* single buffer mode - return pointers to front surfaces */
+      if (front_left) {
+         struct st_renderbuffer *strb =
+            st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+         *front_left = strb->surface;
+      }
+      if (front_right) {
+         struct st_renderbuffer *strb =
+            st_renderbuffer(fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer);
+         *front_right = strb ? strb->surface : NULL;
+      }
+      return;
+   }
+
+   /* swap left buffers */
+   if (fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer &&
+       fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer) {
+      struct gl_renderbuffer *rbTemp;
+      rbTemp = fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+      fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer =
+         fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+      fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer = rbTemp;
+      if (front_left) {
+         struct st_renderbuffer *strb =
+            st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+         *front_left = strb->surface;
+      }
+      /* mark back buffer contents as undefined */
+      {
+         struct st_renderbuffer *back =
+            st_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+         back->defined = GL_FALSE;
+      }
+   }
+   else {
+      /* no front buffer, display the back buffer */
+      if (front_left) {
+         struct st_renderbuffer *strb =
+            st_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+         *front_left = strb->surface;
+      }
+   }
+
+   /* swap right buffers (for stereo) */
+   if (fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer &&
+       fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer) {
+      struct gl_renderbuffer *rbTemp;
+      rbTemp = fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer;
+      fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer =
+         fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer;
+      fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer = rbTemp;
+      if (front_right) {
+         struct st_renderbuffer *strb =
+            st_renderbuffer(fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer);
+         *front_right = strb->surface;
+      }
+      /* mark back buffer contents as undefined */
+      {
+         struct st_renderbuffer *back =
+            st_renderbuffer(fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer);
+         back->defined = GL_FALSE;
+      }
+   }
+   else {
+      /* no front right buffer, display back right buffer (if exists) */
+      if (front_right) {
+         struct st_renderbuffer *strb =
+            st_renderbuffer(fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer);
+         *front_right = strb ? strb->surface : NULL;
+      }
+   }
+
+   /* Update the _ColorDrawBuffers[] array and _ColorReadBuffer pointer */
+   _mesa_update_framebuffer(ctx);
+
+   /* Make sure we draw into the new back surface */
+   st_invalidate_state(ctx, _NEW_BUFFERS);
+}
+
+
 void *st_framebuffer_private( struct st_framebuffer *stfb )
 {
    return stfb->Private;
diff --git a/src/mesa/state_tracker/st_public.h b/src/mesa/state_tracker/st_public.h
index 290b8a974e6..174fbc63941 100644
--- a/src/mesa/state_tracker/st_public.h
+++ b/src/mesa/state_tracker/st_public.h
@@ -103,6 +103,10 @@ void st_finish( struct st_context *st );
 
 void st_notify_swapbuffers(struct st_framebuffer *stfb);
 
+void st_swapbuffers(struct st_framebuffer *stfb,
+                    struct pipe_surface **front_left,
+                    struct pipe_surface **front_right);
+
 int st_set_teximage(struct pipe_texture *pt, int target);
 
 /** Redirect rendering into stfb's surface to a texture image */
diff --git a/src/mesa/swrast/s_imaging.c b/src/mesa/swrast/s_imaging.c
index d6be3aa022e..3578b713f61 100644
--- a/src/mesa/swrast/s_imaging.c
+++ b/src/mesa/swrast/s_imaging.c
@@ -60,7 +60,7 @@ _swrast_CopyColorTable( GLcontext *ctx,
 
    /* save PBO binding */
    bufferSave = ctx->Unpack.BufferObj;
-   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+   ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj;
 
    _mesa_ColorTable(target, internalformat, width, GL_RGBA, CHAN_TYPE, data);
 
@@ -94,7 +94,7 @@ _swrast_CopyColorSubTable( GLcontext *ctx,GLenum target, GLsizei start,
 
    /* save PBO binding */
    bufferSave = ctx->Unpack.BufferObj;
-   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+   ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj;
 
    _mesa_ColorSubTable(target, start, width, GL_RGBA, CHAN_TYPE, data);
 
@@ -126,7 +126,7 @@ _swrast_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target,
 
    /* save PBO binding */
    bufferSave = ctx->Unpack.BufferObj;
-   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+   ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj;
 
    /* store as convolution filter */
    _mesa_ConvolutionFilter1D(target, internalFormat, width,
@@ -178,12 +178,12 @@ _swrast_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target,
    ctx->Unpack.SkipImages = 0;
    ctx->Unpack.SwapBytes = GL_FALSE;
    ctx->Unpack.LsbFirst = GL_FALSE;
-   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+   ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj;
    ctx->NewState |= _NEW_PACKUNPACK;
 
    /* save PBO binding */
    bufferSave = ctx->Unpack.BufferObj;
-   ctx->Unpack.BufferObj = ctx->Array.NullBufferObj;
+   ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj;
 
    _mesa_ConvolutionFilter2D(target, internalFormat, width, height,
                              GL_RGBA, CHAN_TYPE, rgba);
diff --git a/src/mesa/swrast/s_texfilter.c b/src/mesa/swrast/s_texfilter.c
index 31bfb5c9520..0067d3eeb73 100644
--- a/src/mesa/swrast/s_texfilter.c
+++ b/src/mesa/swrast/s_texfilter.c
@@ -1329,7 +1329,7 @@ static void
 opt_sample_rgb_2d(GLcontext *ctx,
                   const struct gl_texture_object *tObj,
                   GLuint n, const GLfloat texcoords[][4],
-                  const GLfloat lambda[], GLchan rgba[][4])
+                  const GLfloat lambda[], GLfloat rgba[][4])
 {
    const struct gl_texture_image *img = tObj->Image[0][tObj->BaseLevel];
    const GLfloat width = (GLfloat) img->Width;
@@ -1351,9 +1351,9 @@ opt_sample_rgb_2d(GLcontext *ctx,
       GLint j = IFLOOR(texcoords[k][1] * height) & rowMask;
       GLint pos = (j << shift) | i;
       GLchan *texel = ((GLchan *) img->Data) + 3*pos;
-      rgba[k][RCOMP] = texel[0];
-      rgba[k][GCOMP] = texel[1];
-      rgba[k][BCOMP] = texel[2];
+      rgba[k][RCOMP] = CHAN_TO_FLOAT(texel[0]);
+      rgba[k][GCOMP] = CHAN_TO_FLOAT(texel[1]);
+      rgba[k][BCOMP] = CHAN_TO_FLOAT(texel[2]);
    }
 }
 
@@ -1370,7 +1370,7 @@ static void
 opt_sample_rgba_2d(GLcontext *ctx,
                    const struct gl_texture_object *tObj,
                    GLuint n, const GLfloat texcoords[][4],
-                   const GLfloat lambda[], GLchan rgba[][4])
+                   const GLfloat lambda[], GLfloat rgba[][4])
 {
    const struct gl_texture_image *img = tObj->Image[0][tObj->BaseLevel];
    const GLfloat width = (GLfloat) img->Width;
@@ -1392,7 +1392,10 @@ opt_sample_rgba_2d(GLcontext *ctx,
       const GLint row = IFLOOR(texcoords[i][1] * height) & rowMask;
       const GLint pos = (row << shift) | col;
       const GLchan *texel = ((GLchan *) img->Data) + (pos << 2);    /* pos*4 */
-      COPY_4V(rgba[i], texel);
+      rgba[i][RCOMP] = CHAN_TO_FLOAT(texel[0]);
+      rgba[i][GCOMP] = CHAN_TO_FLOAT(texel[1]);
+      rgba[i][BCOMP] = CHAN_TO_FLOAT(texel[2]);
+      rgba[i][ACOMP] = CHAN_TO_FLOAT(texel[3]);
    }
 }
 
@@ -1425,7 +1428,6 @@ sample_lambda_2d(GLcontext *ctx,
       case GL_NEAREST:
          if (repeatNoBorderPOT) {
             switch (tImg->TexFormat->MesaFormat) {
-#if 0
             case MESA_FORMAT_RGB:
                opt_sample_rgb_2d(ctx, tObj, m, texcoords + minStart,
                                  NULL, rgba + minStart);
@@ -1434,7 +1436,6 @@ sample_lambda_2d(GLcontext *ctx,
 	       opt_sample_rgba_2d(ctx, tObj, m, texcoords + minStart,
                                   NULL, rgba + minStart);
                break;
-#endif
             default:
                sample_nearest_2d(ctx, tObj, m, texcoords + minStart,
                                  NULL, rgba + minStart );
@@ -1484,7 +1485,6 @@ sample_lambda_2d(GLcontext *ctx,
       case GL_NEAREST:
          if (repeatNoBorderPOT) {
             switch (tImg->TexFormat->MesaFormat) {
-#if 0
             case MESA_FORMAT_RGB:
                opt_sample_rgb_2d(ctx, tObj, m, texcoords + magStart,
                                  NULL, rgba + magStart);
@@ -1493,7 +1493,6 @@ sample_lambda_2d(GLcontext *ctx,
 	       opt_sample_rgba_2d(ctx, tObj, m, texcoords + magStart,
                                   NULL, rgba + magStart);
                break;
-#endif
             default:
                sample_nearest_2d(ctx, tObj, m, texcoords + magStart,
                                  NULL, rgba + magStart );
@@ -3180,7 +3179,6 @@ _swrast_choose_texture_sample_func( GLcontext *ctx,
          }
          else {
             /* check for a few optimized cases */
-#if 0
             const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
             ASSERT(t->MinFilter == GL_NEAREST);
             if (t->WrapS == GL_REPEAT &&
@@ -3197,10 +3195,6 @@ _swrast_choose_texture_sample_func( GLcontext *ctx,
                      img->TexFormat->MesaFormat == MESA_FORMAT_RGBA) {
                return &opt_sample_rgba_2d;
             }
-#else
-            if (0)
-               ;
-#endif
             else {
                return &sample_nearest_2d;
             }
diff --git a/src/mesa/tnl/t_vb_cliptmp.h b/src/mesa/tnl/t_vb_cliptmp.h
index 788fe329ed8..61b0a89554c 100644
--- a/src/mesa/tnl/t_vb_cliptmp.h
+++ b/src/mesa/tnl/t_vb_cliptmp.h
@@ -127,7 +127,7 @@ TAG(clip_line)( GLcontext *ctx, GLuint v0, GLuint v1, GLubyte mask )
    GLuint p;
    const GLuint v0_orig = v0;
 
-   if (mask & 0x3f) {
+   if (mask & CLIP_FRUSTUM_BITS) {
       LINE_CLIP( CLIP_RIGHT_BIT,  -1,  0,  0, 1 );
       LINE_CLIP( CLIP_LEFT_BIT,    1,  0,  0, 1 );
       LINE_CLIP( CLIP_TOP_BIT,     0, -1,  0, 1 );
@@ -199,7 +199,7 @@ TAG(clip_tri)( GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLubyte mask )
 
    ASSIGN_3V(inlist, v2, v0, v1 ); /* pv rotated to slot zero */
 
-   if (mask & 0x3f) {
+   if (mask & CLIP_FRUSTUM_BITS) {
       POLY_CLIP( CLIP_RIGHT_BIT,  -1,  0,  0, 1 );
       POLY_CLIP( CLIP_LEFT_BIT,    1,  0,  0, 1 );
       POLY_CLIP( CLIP_TOP_BIT,     0, -1,  0, 1 );
@@ -227,6 +227,25 @@ TAG(clip_tri)( GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLubyte mask )
       }
    }
 
+   if (0) {
+      /* print pre/post-clip vertex coords */
+      GLuint i, j;
+      _mesa_printf("pre clip\n");
+      for (i = 0; i < 3; i++) {
+         j = outlist[i];
+         _mesa_printf("  %u: %u: %f, %f, %f, %f\n",
+                      i, j,
+                      coord[j][0], coord[j][1], coord[j][2], coord[j][3]);
+      }
+      _mesa_printf("post clip\n");
+      for (i = 0; i < n; i++) {
+         j = inlist[i];
+         _mesa_printf("  %u: %u: %f, %f, %f, %f\n",
+                      i, j,
+                      coord[j][0], coord[j][1], coord[j][2], coord[j][3]);
+      }
+   }
+
    tnl->Driver.Render.ClippedPolygon( ctx, inlist, n );
 }
 
@@ -250,7 +269,7 @@ TAG(clip_quad)( GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3,
 
    ASSIGN_4V(inlist, v3, v0, v1, v2 ); /* pv rotated to slot zero */
 
-   if (mask & 0x3f) {
+   if (mask & CLIP_FRUSTUM_BITS) {
       POLY_CLIP( CLIP_RIGHT_BIT,  -1,  0,  0, 1 );
       POLY_CLIP( CLIP_LEFT_BIT,    1,  0,  0, 1 );
       POLY_CLIP( CLIP_TOP_BIT,     0, -1,  0, 1 );
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index ca8190fd059..f193a4bf1e0 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -28,6 +28,7 @@
 #include "main/imports.h"
 #include "main/mtypes.h"
 #include "main/api_arrayelt.h"
+#include "main/bufferobj.h"
 #include "math/m_eval.h"
 #include "vbo.h"
 #include "vbo_context.h"
@@ -81,7 +82,8 @@ static void init_legacy_currval(GLcontext *ctx)
       cl->Type = GL_FLOAT;
       cl->Format = GL_RGBA;
       cl->Ptr = (const void *)ctx->Current.Attrib[i];
-      cl->BufferObj = ctx->Array.NullBufferObj;
+      _mesa_reference_buffer_object(ctx, &cl->BufferObj,
+                                    ctx->Shared->NullBufferObj);
    }
 }
 
@@ -106,7 +108,8 @@ static void init_generic_currval(GLcontext *ctx)
       cl->Stride = 0;
       cl->StrideB = 0;
       cl->Enabled = 1;
-      cl->BufferObj = ctx->Array.NullBufferObj;
+      _mesa_reference_buffer_object(ctx, &cl->BufferObj,
+                                    ctx->Shared->NullBufferObj);
    }
 }
 
@@ -150,7 +153,7 @@ static void init_mat_currval(GLcontext *ctx)
       cl->Stride = 0;
       cl->StrideB = 0;
       cl->Enabled = 1;
-      cl->BufferObj = ctx->Array.NullBufferObj;
+      cl->BufferObj = ctx->Shared->NullBufferObj;
    }
 }
 
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 5d35ec9c111..6871ee5cab1 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -671,7 +671,7 @@ void vbo_use_buffer_objects(GLcontext *ctx)
    GLsizei size = VBO_VERT_BUFFER_SIZE;
 
    /* Make sure this func is only used once */
-   assert(exec->vtx.bufferobj == ctx->Array.NullBufferObj);
+   assert(exec->vtx.bufferobj == ctx->Shared->NullBufferObj);
    if (exec->vtx.buffer_map) {
       _mesa_align_free(exec->vtx.buffer_map);
       exec->vtx.buffer_map = NULL;
@@ -697,7 +697,7 @@ void vbo_exec_vtx_init( struct vbo_exec_context *exec )
     */
    _mesa_reference_buffer_object(ctx,
                                  &exec->vtx.bufferobj,
-                                 ctx->Array.NullBufferObj);
+                                 ctx->Shared->NullBufferObj);
 
    ASSERT(!exec->vtx.buffer_map);
    exec->vtx.buffer_map = (GLfloat *)ALIGN_MALLOC(VBO_VERT_BUFFER_SIZE, 64);
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 0d4cbe9a1e5..f4ad394f516 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -352,6 +352,13 @@ vbo_exec_DrawRangeElements(GLenum mode,
    if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, type, indices ))
       return;
 
+   if (end >= ctx->Array._MaxElement) {
+      /* the max element is out of bounds of one or more enabled arrays */
+      _mesa_warning(ctx, "glDraw[Range]Elements() index=%u is "
+                    "out of bounds (max=%u)", end, ctx->Array._MaxElement);
+      return;
+   }
+
    FLUSH_CURRENT( ctx, 0 );
 
    if (ctx->NewState)
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index dae778e741e..ea87dede646 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -161,7 +161,7 @@ void vbo_rebase_prims( GLcontext *ctx,
 				 GL_ELEMENT_ARRAY_BUFFER,
 				 ib->obj);
 
-      tmp_ib.obj = ctx->Array.NullBufferObj;
+      tmp_ib.obj = ctx->Shared->NullBufferObj;
       tmp_ib.ptr = tmp_indices;
       tmp_ib.count = ib->count;
       tmp_ib.type = ib->type;
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 5fb66d3318f..2f6a1998eaa 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -31,6 +31,7 @@
 
 #include "main/glheader.h"
 #include "main/imports.h"
+#include "main/image.h"
 #include "main/macros.h"
 #include "main/enums.h"
 #include "main/mtypes.h"
@@ -41,7 +42,8 @@
 
 #define ELT_TABLE_SIZE 16
 
-/* Used for vertex-level splitting of indexed buffers.  Note that
+/**
+ * Used for vertex-level splitting of indexed buffers.  Note that
  * non-indexed primitives may be converted to indexed in some cases
  * (eg loops, fans) in order to use this splitting path.
  */
@@ -73,23 +75,21 @@ struct copy_context {
    GLuint *translated_elt_buf;
    const GLuint *srcelt;
 
-   /* A baby hash table to avoid re-emitting (some) duplicate
+   /** A baby hash table to avoid re-emitting (some) duplicate
     * vertices when splitting indexed primitives.
     */
    struct { 
       GLuint in;
       GLuint out;
    } vert_cache[ELT_TABLE_SIZE];
-      
 
    GLuint vertex_size;
    GLubyte *dstbuf;
-   GLubyte *dstptr;		/* dstptr == dstbuf + dstelt_max * vertsize */
-   GLuint dstbuf_size;	/* in vertices */
-   GLuint dstbuf_nr;		/* count of emitted vertices, also the
-				 * largest value in dstelt.  Our
-				 * MaxIndex.
-				 */
+   GLubyte *dstptr;     /**< dstptr == dstbuf + dstelt_max * vertsize */
+   GLuint dstbuf_size;  /**< in vertices */
+   GLuint dstbuf_nr;    /**< count of emitted vertices, also the largest value
+                         * in dstelt.  Our MaxIndex.
+                         */
 
    GLuint *dstelt;
    GLuint dstelt_nr;
@@ -102,32 +102,19 @@ struct copy_context {
 };
 
 
-static GLuint type_size( GLenum type )
-{
-   switch(type) {
-   case GL_BYTE: return sizeof(GLbyte);
-   case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
-   case GL_SHORT: return sizeof(GLshort);
-   case GL_UNSIGNED_SHORT: return sizeof(GLushort);
-   case GL_INT: return sizeof(GLint);
-   case GL_UNSIGNED_INT: return sizeof(GLuint);
-   case GL_FLOAT: return sizeof(GLfloat);
-   case GL_DOUBLE: return sizeof(GLdouble);
-   default: return 0;
-   }
-}
-
 static GLuint attr_size( const struct gl_client_array *array )
 {
-   return array->Size * type_size(array->Type);
+   return array->Size * _mesa_sizeof_type(array->Type);
 }
 
 
-/* Starts returning true slightly before the buffer fills, to ensure
+/**
+ * Starts returning true slightly before the buffer fills, to ensure
  * that there is sufficient room for any remaining vertices to finish
  * off the prim:
  */
-static GLboolean check_flush( struct copy_context *copy )
+static GLboolean
+check_flush( struct copy_context *copy )
 {
    GLenum mode = copy->dstprim[copy->dstprim_nr].mode; 
 
@@ -145,7 +132,9 @@ static GLboolean check_flush( struct copy_context *copy )
    return GL_FALSE;
 }
 
-static void flush( struct copy_context *copy )
+
+static void
+flush( struct copy_context *copy )
 {
    GLuint i;
 
@@ -175,8 +164,11 @@ static void flush( struct copy_context *copy )
 }
 
 
-
-static void begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
+/**
+ * Called at begin of each primitive during replay.
+ */
+static void
+begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
 {
    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
 
@@ -187,10 +179,12 @@ static void begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag
 }
 
 
-/* Use a hashtable to attempt to identify recently-emitted vertices
+/**
+ * Use a hashtable to attempt to identify recently-emitted vertices
  * and avoid re-emitting them.
  */
-static GLuint elt(struct copy_context *copy, GLuint elt_idx)
+static GLuint
+elt(struct copy_context *copy, GLuint elt_idx)
 {
    GLuint elt = copy->srcelt[elt_idx];
    GLuint slot = elt & (ELT_TABLE_SIZE-1);
@@ -222,7 +216,6 @@ static GLuint elt(struct copy_context *copy, GLuint elt_idx)
 	       _mesa_printf("%x ", f[j]);
 	    _mesa_printf("\n");
 	 }
-	       
       }
 
       copy->vert_cache[slot].in = elt;
@@ -230,9 +223,8 @@ static GLuint elt(struct copy_context *copy, GLuint elt_idx)
       copy->dstptr += copy->vertex_size;
 
       assert(csr == copy->dstptr);
-      assert(copy->dstptr == (copy->dstbuf + 
-				    copy->dstbuf_nr * 
-				    copy->vertex_size));
+      assert(copy->dstptr == (copy->dstbuf +
+                              copy->dstbuf_nr * copy->vertex_size));
    }
 /*    else */
 /*       _mesa_printf("  --> reuse vertex\n"); */
@@ -242,7 +234,12 @@ static GLuint elt(struct copy_context *copy, GLuint elt_idx)
    return check_flush(copy);
 }
 
-static void end( struct copy_context *copy, GLboolean end_flag )
+
+/**
+ * Called at end of each primitive during replay.
+ */
+static void
+end( struct copy_context *copy, GLboolean end_flag )
 {
    struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
 
@@ -257,8 +254,8 @@ static void end( struct copy_context *copy, GLboolean end_flag )
 }
 
 
-
-static void replay_elts( struct copy_context *copy )
+static void
+replay_elts( struct copy_context *copy )
 {
    GLuint i, j, k;
    GLboolean split;
@@ -362,7 +359,8 @@ static void replay_elts( struct copy_context *copy )
 }
 
 
-static void replay_init( struct copy_context *copy )
+static void
+replay_init( struct copy_context *copy )
 {
    GLcontext *ctx = copy->ctx;
    GLuint i;
@@ -388,10 +386,7 @@ static void replay_init( struct copy_context *copy )
 	 copy->vertex_size += attr_size(copy->array[i]);
       
 	 if (vbo->Name && !vbo->Pointer) 
-	    ctx->Driver.MapBuffer(ctx,
-				  GL_ARRAY_BUFFER_ARB, 
-				  GL_WRITE_ONLY, /* XXX */
-				  vbo);
+	    ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY, vbo);
 
 	 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
 						 copy->array[i]->Ptr);
@@ -405,12 +400,11 @@ static void replay_init( struct copy_context *copy )
     * do it internally.
     */
    if (copy->ib->obj->Name && !copy->ib->obj->Pointer) 
-      ctx->Driver.MapBuffer(ctx, 
-			    GL_ARRAY_BUFFER_ARB, /* XXX */
-			    GL_WRITE_ONLY, /* XXX */
+      ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY,
 			    copy->ib->obj);
 
-   srcptr = (const GLubyte *)ADD_POINTERS(copy->ib->obj->Pointer, copy->ib->ptr);
+   srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer,
+                                           copy->ib->ptr);
 
    switch (copy->ib->type) {
    case GL_UNSIGNED_BYTE:
@@ -434,7 +428,6 @@ static void replay_init( struct copy_context *copy )
       copy->srcelt = (const GLuint *)srcptr;
       break;
    }
-   
 
    /* Figure out the maximum allowed vertex buffer size:
     */
@@ -449,8 +442,7 @@ static void replay_init( struct copy_context *copy )
     *
     * XXX:  This should be a VBO!
     */
-   copy->dstbuf = _mesa_malloc(copy->dstbuf_size * 
-			       copy->vertex_size);   
+   copy->dstbuf = _mesa_malloc(copy->dstbuf_size * copy->vertex_size);   
    copy->dstptr = copy->dstbuf;
 
    /* Setup new vertex arrays to point into the output buffer: 
@@ -467,7 +459,7 @@ static void replay_init( struct copy_context *copy )
       dst->Ptr = copy->dstbuf + offset;
       dst->Enabled = GL_TRUE;
       dst->Normalized = src->Normalized; 
-      dst->BufferObj = ctx->Array.NullBufferObj;
+      dst->BufferObj = ctx->Shared->NullBufferObj;
       dst->_MaxElement = copy->dstbuf_size; /* may be less! */
 
       offset += copy->varying[i].size;
@@ -487,12 +479,16 @@ static void replay_init( struct copy_context *copy )
     */
    copy->dstib.count = 0;	/* duplicates dstelt_nr */
    copy->dstib.type = GL_UNSIGNED_INT;
-   copy->dstib.obj = ctx->Array.NullBufferObj;
+   copy->dstib.obj = ctx->Shared->NullBufferObj;
    copy->dstib.ptr = copy->dstelt;
 }
 
 
-static void replay_finish( struct copy_context *copy )
+/**
+ * Free up everything allocated during split/replay.
+ */
+static void
+replay_finish( struct copy_context *copy )
 {
    GLcontext *ctx = copy->ctx;
    GLuint i;
@@ -502,25 +498,26 @@ static void replay_finish( struct copy_context *copy )
    _mesa_free(copy->translated_elt_buf);
    _mesa_free(copy->dstbuf);
    _mesa_free(copy->dstelt);
-   
+
    /* Unmap VBO's 
     */
    for (i = 0; i < copy->nr_varying; i++) {
       struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
-
       if (vbo->Name && vbo->Pointer) 
-	 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo);
+	 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, vbo);
    }
 
    /* Unmap index buffer:
     */
    if (copy->ib->obj->Name && copy->ib->obj->Pointer) {
-      ctx->Driver.UnmapBuffer(ctx, 
-			      GL_ARRAY_BUFFER_ARB, /* XXX */
-			      copy->ib->obj);
+      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, copy->ib->obj);
    }
 }
 
+
+/**
+ * Split VBO into smaller pieces, draw the pieces.
+ */
 void vbo_split_copy( GLcontext *ctx,
 		     const struct gl_client_array *arrays[],
 		     const struct _mesa_prim *prim,
@@ -546,13 +543,11 @@ void vbo_split_copy( GLcontext *ctx,
    copy.draw = draw;
    copy.limits = limits;
 
-
    /* Clear the vertex cache:
     */
    for (i = 0; i < ELT_TABLE_SIZE; i++)
       copy.vert_cache[i].in = ~0;
 
-
    replay_init(&copy);
    replay_elts(&copy);
    replay_finish(&copy);
diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c
index fbc856e93b0..3ed6b34fbf0 100644
--- a/src/mesa/vbo/vbo_split_inplace.c
+++ b/src/mesa/vbo/vbo_split_inplace.c
@@ -221,7 +221,7 @@ static void split_prims( struct split_context *split)
 
 	 ib.count = count;
 	 ib.type = GL_UNSIGNED_INT;
-	 ib.obj = split->ctx->Array.NullBufferObj;
+	 ib.obj = split->ctx->Shared->NullBufferObj;
 	 ib.ptr = elts;
 	    
 	 tmpprim = *prim;