10 files changed, 242 insertions, 227 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 3cf0ea0991d..c55c5c426e0 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -130,9 +130,10 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
  * Allocate space for and store data in a buffer object.  Any data that was
  * previously stored in the buffer object is lost.  If data is NULL,
  * memory will be allocated, but no copy will occur.
- * Called via glBufferDataARB().
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
  */
-static void
+static GLboolean
 intel_bufferobj_data(GLcontext * ctx,
                      GLenum target,
                      GLsizeiptrARB size,
@@ -167,15 +168,19 @@ intel_bufferobj_data(GLcontext * ctx,
 	 if (intel_obj->sys_buffer != NULL) {
 	    if (data != NULL)
 	       memcpy(intel_obj->sys_buffer, data, size);
-	    return;
+	    return GL_TRUE;
 	 }
       }
 #endif
       intel_bufferobj_alloc_buffer(intel, intel_obj);
+      if (!intel_obj->buffer)
+         return GL_FALSE;
 
       if (data != NULL)
 	 dri_bo_subdata(intel_obj->buffer, 0, size, data);
    }
+
+   return GL_TRUE;
 }
 
 
@@ -267,6 +272,9 @@ intel_bufferobj_map(GLcontext * ctx,
    }
 
    obj->Pointer = intel_obj->buffer->virtual;
+   obj->Length = obj->Size;
+   obj->Offset = 0;
+
    return obj->Pointer;
 }
 
@@ -288,7 +296,7 @@ intel_bufferobj_map(GLcontext * ctx,
  */
 static void *
 intel_bufferobj_map_range(GLcontext * ctx,
-			  GLenum target, GLsizei offset, GLsizeiptr length,
+			  GLenum target, GLintptr offset, GLsizeiptr length,
 			  GLbitfield access, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
@@ -296,6 +304,13 @@ intel_bufferobj_map_range(GLcontext * ctx,
 
    assert(intel_obj);
 
+   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
+    * internally uses our functions directly.
+    */
+   obj->Offset = offset;
+   obj->Length = length;
+   obj->AccessFlags = access;
+
    if (intel_obj->sys_buffer) {
       obj->Pointer = intel_obj->sys_buffer + offset;
       return obj->Pointer;
@@ -308,16 +323,9 @@ intel_bufferobj_map_range(GLcontext * ctx,
     * the batchbuffer so that GEM knows about the buffer access for later
     * syncing.
     */
-   if ((access & GL_MAP_WRITE_BIT) && !(access & GL_MAP_UNSYNCHRONIZED_BIT))
+   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT))
       intelFlush(ctx);
 
-   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
-    * internally uses our functions directly.
-    */
-   obj->Offset = offset;
-   obj->Length = length;
-   obj->AccessFlags = access;
-
    if (intel_obj->buffer == NULL) {
       obj->Pointer = NULL;
       return NULL;
@@ -341,19 +349,24 @@ intel_bufferobj_map_range(GLcontext * ctx,
     */
    if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
        drm_intel_bo_busy(intel_obj->buffer)) {
-      intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
-						   "range map",
-						   length, 64);
-      if (!(access & GL_MAP_READ_BIT) &&
-	  intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
-	 intel_obj->mapped_gtt = GL_TRUE;
+      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
+	 intel_obj->range_map_buffer = _mesa_malloc(length);
+	 obj->Pointer = intel_obj->range_map_buffer;
       } else {
-	 drm_intel_bo_map(intel_obj->range_map_bo,
-			  (access & GL_MAP_WRITE_BIT) != 0);
-	 intel_obj->mapped_gtt = GL_FALSE;
+	 intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
+						      "range map",
+						      length, 64);
+	 if (!(access & GL_MAP_READ_BIT) &&
+	     intel->intelScreen->kernel_exec_fencing) {
+	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
+	    intel_obj->mapped_gtt = GL_TRUE;
+	 } else {
+	    drm_intel_bo_map(intel_obj->range_map_bo,
+			     (access & GL_MAP_WRITE_BIT) != 0);
+	    intel_obj->mapped_gtt = GL_FALSE;
+	 }
+	 obj->Pointer = intel_obj->range_map_bo->virtual;
       }
-      obj->Pointer = intel_obj->range_map_bo->virtual;
       return obj->Pointer;
    }
 
@@ -370,6 +383,38 @@ intel_bufferobj_map_range(GLcontext * ctx,
    return obj->Pointer;
 }
 
+/* Ideally we'd use a BO to avoid taking up cache space for the temporary
+ * data, but FlushMappedBufferRange may be followed by further writes to
+ * the pointer, so we would have to re-map after emitting our blit, which
+ * would defeat the point.
+ */
+static void
+intel_bufferobj_flush_mapped_range(GLcontext *ctx, GLenum target,
+				   GLintptr offset, GLsizeiptr length,
+				   struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   drm_intel_bo *temp_bo;
+
+   /* Unless we're in the range map using a temporary system buffer,
+    * there's no work to do.
+    */
+   if (intel_obj->range_map_buffer == NULL)
+      return;
+
+   temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64);
+
+   drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
+
+   intel_emit_linear_blit(intel,
+			  intel_obj->buffer, obj->Offset + offset,
+			  temp_bo, 0,
+			  length);
+
+   drm_intel_bo_unreference(temp_bo);
+}
+
 
 /**
  * Called via glUnmapBuffer().
@@ -385,6 +430,15 @@ intel_bufferobj_unmap(GLcontext * ctx,
    assert(obj->Pointer);
    if (intel_obj->sys_buffer != NULL) {
       /* always keep the mapping around. */
+   } else if (intel_obj->range_map_buffer != NULL) {
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(intel->batch);
+      free(intel_obj->range_map_buffer);
+      intel_obj->range_map_buffer = NULL;
    } else if (intel_obj->range_map_bo != NULL) {
       if (intel_obj->mapped_gtt) {
 	 drm_intel_gem_bo_unmap_gtt(intel_obj->range_map_bo);
@@ -392,10 +446,6 @@ intel_bufferobj_unmap(GLcontext * ctx,
 	 drm_intel_bo_unmap(intel_obj->range_map_bo);
       }
 
-      /* We ignore the FLUSH_EXPLICIT bit and the calls associated with it.
-       * It would be a small win to support that, but for now we just copy
-       * the whole mapped range into place.
-       */
       intel_emit_linear_blit(intel,
 			     intel_obj->buffer, obj->Offset,
 			     intel_obj->range_map_bo, 0,
@@ -418,6 +468,8 @@ intel_bufferobj_unmap(GLcontext * ctx,
       }
    }
    obj->Pointer = NULL;
+   obj->Offset = 0;
+   obj->Length = 0;
 
    return GL_TRUE;
 }
@@ -523,6 +575,7 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions)
    functions->GetBufferSubData = intel_bufferobj_get_subdata;
    functions->MapBuffer = intel_bufferobj_map;
    functions->MapBufferRange = intel_bufferobj_map_range;
+   functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = intel_bufferobj_unmap;
    functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
index 06a8ab9824c..bf3e08a3201 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
@@ -50,6 +50,7 @@ struct intel_buffer_object
                                    buffer object? */
 
    drm_intel_bo *range_map_bo;
+   void *range_map_buffer;
    unsigned int range_map_offset;
    GLsizei range_map_size;
 
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 630d2adab87..bce23724b38 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -27,25 +27,9 @@
  **************************************************************************/
 
 #include "main/glheader.h"
-#include "main/enums.h"
-#include "main/image.h"
 #include "main/mtypes.h"
-#include "main/arrayobj.h"
-#include "main/attrib.h"
-#include "main/blend.h"
-#include "main/bufferobj.h"
-#include "main/buffers.h"
-#include "main/depth.h"
-#include "main/enable.h"
-#include "main/macros.h"
-#include "main/matrix.h"
-#include "main/polygon.h"
-#include "main/texstate.h"
-#include "main/shaders.h"
-#include "main/stencil.h"
-#include "main/varray.h"
-#include "glapi/dispatch.h"
 #include "swrast/swrast.h"
+#include "drivers/common/meta.h"
 
 #include "intel_context.h"
 #include "intel_blit.h"
@@ -140,7 +124,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
     * buffer with it.
     */
    if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
-      int color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS);
+      int color_bit = _mesa_ffs(mask & BUFFER_BITS_COLOR);
       if (color_bit != 0) {
 	 tri_mask |= blit_mask & (1 << (color_bit - 1));
 	 blit_mask &= ~(1 << (color_bit - 1));
@@ -186,7 +170,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
 	 }
 	 DBG("\n");
       }
-      meta_clear_tris(&intel->meta, tri_mask);
+      _mesa_meta_clear(&intel->ctx, tri_mask);
    }
 
    if (swrast_mask) {
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 46f1a7f7205..89f99f7ffdb 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -598,6 +598,7 @@ intelInitDriverFunctions(struct dd_function_table *functions)
    intelInitBufferFuncs(functions);
    intelInitPixelFuncs(functions);
    intelInitBufferObjectFuncs(functions);
+   intel_init_syncobj_functions(functions);
 }
 
 
@@ -640,6 +641,10 @@ intelInitContext(struct intel_context *intel,
       intel->maxBatchSize = BATCH_SZ;
 
    intel->bufmgr = intelScreen->bufmgr;
+
+   if (0) /* for debug */
+      drm_intel_bufmgr_set_debug(intel->bufmgr, 1);
+
    intel->ttm = intelScreen->ttm;
    if (intel->ttm) {
       int bo_reuse_mode;
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 0d9db5eb1da..03e7cf39d68 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -80,6 +80,12 @@ extern void intelFallback(struct intel_context *intel, GLuint bit,
 
 #define INTEL_MAX_FIXUP 64
 
+struct intel_sync_object {
+   struct gl_sync_object Base;
+
+   /** Batch associated with this sync object */
+   drm_intel_bo *bo;
+};
 
 /**
  * intel_context is derived from Mesa's context class: GLcontext.
@@ -470,6 +476,8 @@ extern void intelFlush(GLcontext * ctx);
 
 extern void intelInitDriverFunctions(struct dd_function_table *functions);
 
+void intel_init_syncobj_functions(struct dd_function_table *functions);
+
 
 /* ================================================================
  * intel_state.c:
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 7aee70f0a8f..2e61c556d85 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -36,6 +36,7 @@
 #define need_GL_ARB_occlusion_query
 #define need_GL_ARB_point_parameters
 #define need_GL_ARB_shader_objects
+#define need_GL_ARB_sync
 #define need_GL_ARB_vertex_array_object
 #define need_GL_ARB_vertex_program
 #define need_GL_ARB_vertex_shader
@@ -77,6 +78,7 @@ static const struct dri_extension card_extensions[] = {
    { "GL_ARB_multitexture",               NULL },
    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
    { "GL_ARB_point_sprite",               NULL },
+   { "GL_ARB_sync",                       GL_ARB_sync_functions },
    { "GL_ARB_texture_border_clamp",       NULL },
    { "GL_ARB_texture_cube_map",           NULL },
    { "GL_ARB_texture_env_add",            NULL },
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 540e7620a9d..b543a0bbc3e 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -93,19 +93,12 @@ static const GLubyte *map_pbo( GLcontext *ctx,
    return ADD_POINTERS(buf, bitmap);
 }
 
-static GLboolean test_bit( const GLubyte *src,
-			    GLuint bit )
+static GLboolean test_bit( const GLubyte *src, GLuint bit )
 {
    return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
 }
 
-static GLboolean test_msb_bit(const GLubyte *src, GLuint bit)
-{
-   return (src[bit/8] & (1<<(7 - (bit % 8)))) ? 1 : 0;
-}
-
-static void set_bit( GLubyte *dest,
-			  GLuint bit )
+static void set_bit( GLubyte *dest, GLuint bit )
 {
    dest[bit/8] |= 1 << (bit % 8);
 }
@@ -365,9 +358,7 @@ intel_texture_bitmap(GLcontext * ctx,
    GLuint texname;
    GLfloat vertices[4][4];
    GLint old_active_texture;
-   GLubyte *unpacked_bitmap;
    GLubyte *a8_bitmap;
-   int x, y;
    GLfloat dst_z;
 
    /* We need a fragment program for the KIL effect */
@@ -427,22 +418,16 @@ intel_texture_bitmap(GLcontext * ctx,
       return GL_FALSE;
    }
 
-   /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
    if (unpack->BufferObj->Name) {
       bitmap = map_pbo(ctx, width, height, unpack, bitmap);
       if (bitmap == NULL)
 	 return GL_TRUE;	/* even though this is an error, we're done */
    }
-   unpacked_bitmap = _mesa_unpack_bitmap(width, height, bitmap,
-					 unpack);
+
+   /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
    a8_bitmap = _mesa_calloc(width * height);
-   for (y = 0; y < height; y++) {
-      for (x = 0; x < width; x++) {
-	 if (test_msb_bit(unpacked_bitmap, ALIGN(width, 8) * y + x))
-	    a8_bitmap[y * width + x] = 0xff;
-      }
-   }
-   _mesa_free(unpacked_bitmap);
+   _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff);
+
    if (unpack->BufferObj->Name) {
       /* done with PBO so unmap it now */
       ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
index ca796b36559..07ca8f7ddb5 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -26,18 +26,13 @@
  **************************************************************************/
 
 #include "main/glheader.h"
-#include "main/enums.h"
 #include "main/image.h"
 #include "main/state.h"
 #include "main/mtypes.h"
-#include "main/macros.h"
 #include "drivers/common/meta.h"
 
-#include "intel_screen.h"
 #include "intel_context.h"
-#include "intel_batchbuffer.h"
 #include "intel_buffers.h"
-#include "intel_blit.h"
 #include "intel_regions.h"
 #include "intel_pixel.h"
 
@@ -244,5 +239,6 @@ intelCopyPixels(GLcontext * ctx,
    if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
       return;
 
+   /* this will use swrast if needed */
    _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
index a6b6824164a..8c113881d6b 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -29,8 +29,6 @@
 #include "main/enums.h"
 #include "main/image.h"
 #include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/bufferobj.h"
 #include "main/teximage.h"
 #include "main/texenv.h"
 #include "main/texobj.h"
@@ -41,163 +39,21 @@
 #include "main/enable.h"
 #include "main/buffers.h"
 #include "main/fbobject.h"
-#include "main/renderbuffer.h"
 #include "main/depth.h"
 #include "main/hash.h"
 #include "main/blend.h"
-#include "main/viewport.h"
-#include "glapi/dispatch.h"
-#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
 
-#include "intel_screen.h"
 #include "intel_context.h"
 #include "intel_batchbuffer.h"
 #include "intel_blit.h"
 #include "intel_buffers.h"
 #include "intel_regions.h"
 #include "intel_pixel.h"
-#include "intel_buffer_objects.h"
 #include "intel_fbo.h"
 
-static GLboolean
-intel_texture_drawpixels(GLcontext * ctx,
-			 GLint x, GLint y,
-			 GLsizei width, GLsizei height,
-			 GLenum format,
-			 GLenum type,
-			 const struct gl_pixelstore_attrib *unpack,
-			 const GLvoid *pixels)
-{
-   struct intel_context *intel = intel_context(ctx);
-   GLuint texname;
-   GLfloat vertices[4][4];
-   GLfloat z;
-   GLint old_active_texture;
-   GLenum internalFormat;
-
-   /* We're going to mess with texturing with no regard to existing texture
-    * state, so if there is some set up we have to bail.
-    */
-   if (ctx->Texture._EnabledUnits != 0) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr, "glDrawPixels() fallback: texturing enabled\n");
-      return GL_FALSE;
-   }
-
-   /* Can't do textured DrawPixels with a fragment program, unless we were
-    * to generate a new program that sampled our texture and put the results
-    * in the fragment color before the user's program started.
-    */
-   if (ctx->FragmentProgram.Enabled) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr, "glDrawPixels() fallback: fragment program enabled\n");
-      return GL_FALSE;
-   }
-
-   /* We don't have a way to generate fragments with stencil values which
-    * will set the resulting stencil value.
-    */
-   if (format == GL_STENCIL_INDEX || format == GL_DEPTH_STENCIL)
-      return GL_FALSE;
-
-   /* Check that we can load in a texture this big. */
-   if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
-       height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr, "glDrawPixels() fallback: bitmap too large (%dx%d)\n",
-		 width, height);
-      return GL_FALSE;
-   }
-
-   /* To do DEPTH_COMPONENT, we would need to change our setup to not draw to
-    * the color buffer, and sample the texture values into the fragment depth
-    * in a program.
-    */
-   if (format == GL_DEPTH_COMPONENT) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr,
-		 "glDrawPixels() fallback: format == GL_DEPTH_COMPONENT\n");
-      return GL_FALSE;
-   }
-
-   if (!ctx->Extensions.ARB_texture_non_power_of_two &&
-       (!is_power_of_two(width) || !is_power_of_two(height))) {
-      if (INTEL_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr,
-		 "glDrawPixels() fallback: NPOT texture\n");
-      return GL_FALSE;
-   }
-
-   _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
-		    GL_CURRENT_BIT | GL_VIEWPORT_BIT);
-   _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
-
-   /* XXX: pixel store stuff */
-   _mesa_Disable(GL_POLYGON_STIPPLE);
-
-   old_active_texture = ctx->Texture.CurrentUnit;
-   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
-   _mesa_Enable(GL_TEXTURE_2D);
-   _mesa_GenTextures(1, &texname);
-   _mesa_BindTexture(GL_TEXTURE_2D, texname);
-   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-   _mesa_TexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
-   if (type == GL_ALPHA)
-      internalFormat = GL_ALPHA;
-   else
-      internalFormat = GL_RGBA;
-   _mesa_TexImage2D(GL_TEXTURE_2D, 0, internalFormat, width, height, 0, format,
-		    type, pixels);
-
-   meta_set_passthrough_transform(&intel->meta);
-
-   /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */
-   z = -1.0 + 2.0 * ctx->Current.RasterPos[2];
-
-   /* RasterPos[2] already takes into account the DepthRange mapping. */
-   _mesa_DepthRange(0.0, 1.0);
-
-   /* Create the vertex buffer based on the current raster pos.  The x and y
-    * we're handed are ctx->Current.RasterPos[0,1] rounded to integers.
-    * We also apply the depth.  However, the W component is already multiplied
-    * into ctx->Current.RasterPos[0,1,2] and we can ignore it at this point.
-    */
-   vertices[0][0] = x;
-   vertices[0][1] = y;
-   vertices[0][2] = z;
-   vertices[0][3] = 1.0;
-   vertices[1][0] = x + width * ctx->Pixel.ZoomX;
-   vertices[1][1] = y;
-   vertices[1][2] = z;
-   vertices[1][3] = 1.0;
-   vertices[2][0] = x + width * ctx->Pixel.ZoomX;
-   vertices[2][1] = y + height * ctx->Pixel.ZoomY;
-   vertices[2][2] = z;
-   vertices[2][3] = 1.0;
-   vertices[3][0] = x;
-   vertices[3][1] = y + height * ctx->Pixel.ZoomY;
-   vertices[3][2] = z;
-   vertices[3][3] = 1.0;
-
-   _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
-   _mesa_Enable(GL_VERTEX_ARRAY);
-   meta_set_default_texrect(&intel->meta);
-
-   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
-   meta_restore_texcoords(&intel->meta);
-   meta_restore_transform(&intel->meta);
-
-   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
-   _mesa_PopClientAttrib();
-   _mesa_PopAttrib();
-
-   _mesa_DeleteTextures(1, &texname);
-
-   return GL_TRUE;
-}
 
+/** XXX compare perf of this vs. _mesa_meta_draw_pixels(STENCIL) */
 static GLboolean
 intel_stencil_drawpixels(GLcontext * ctx,
 			 GLint x, GLint y,
@@ -404,17 +260,10 @@ intelDrawPixels(GLcontext * ctx,
                 const struct gl_pixelstore_attrib *unpack,
                 const GLvoid * pixels)
 {
-   if (intel_texture_drawpixels(ctx, x, y, width, height, format, type,
-				unpack, pixels))
-      return;
-
    if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type,
 				unpack, pixels))
       return;
 
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
-
-   _swrast_DrawPixels(ctx, x, y, width, height, format, type,
-		      unpack, pixels);
+   _mesa_meta_draw_pixels(ctx, x, y, width, height, format, type,
+                          unpack, pixels);
 }
diff --git a/src/mesa/drivers/dri/intel/intel_syncobj.c b/src/mesa/drivers/dri/intel/intel_syncobj.c
new file mode 100644
index 00000000000..1286fe929bc
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_syncobj.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <[email protected]>
+ *
+ */
+
+/** @file intel_syncobj.c
+ *
+ * Support for ARB_sync
+ *
+ * ARB_sync is implemented by flushing the current batchbuffer and keeping a
+ * reference on it.  We can then check for completion or wait for compeltion
+ * using the normal buffer object mechanisms.  This does mean that if an
+ * application is using many sync objects, it will emit small batchbuffers
+ * which may end up being a significant overhead.  In other tests of removing
+ * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
+ * performance bottleneck, though.
+ */
+
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+static struct gl_sync_object *
+intel_new_sync_object(GLcontext *ctx, GLuint id)
+{
+   struct intel_sync_object *sync;
+
+   sync = _mesa_calloc(sizeof(struct intel_sync_object));
+
+   return &sync->Base;
+}
+
+static void
+intel_delete_sync_object(GLcontext *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   drm_intel_bo_unreference(sync->bo);
+   _mesa_free(sync);
+}
+
+static void
+intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s,
+	       GLenum condition, GLbitfield flags)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   sync->bo = intel->batch->buf;
+   drm_intel_bo_reference(sync->bo);
+
+   intelFlush(ctx);
+}
+
+/* We ignore the user-supplied timeout.  This is weaselly -- we're allowed to
+ * round to an implementation-dependent accuracy, and right now our
+ * implementation "rounds" to the wait-forever value.
+ *
+ * The fix would be a new kernel function to do the GTT transition with a
+ * timeout.
+ */
+static void intel_client_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
+				 GLbitfield flags, GLuint64 timeout)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   if (sync->bo) {
+      drm_intel_bo_wait_rendering(sync->bo);
+      s->StatusFlag = 1;
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+   }
+}
+
+/* We have nothing to do for WaitSync.  Our GL command stream is sequential,
+ * so given that the sync object has already flushed the batchbuffer,
+ * any batchbuffers coming after this waitsync will naturally not occur until
+ * the previous one is done.
+ */
+static void intel_server_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
+				 GLbitfield flags, GLuint64 timeout)
+{
+}
+
+static void intel_check_sync(GLcontext *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   if (sync->bo && drm_intel_bo_busy(sync->bo)) {
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+      s->StatusFlag = 1;
+   }
+}
+
+void intel_init_syncobj_functions(struct dd_function_table *functions)
+{
+   functions->NewSyncObject = intel_new_sync_object;
+   functions->DeleteSyncObject = intel_delete_sync_object;
+   functions->FenceSync = intel_fence_sync;
+   functions->CheckSync = intel_check_sync;
+   functions->ClientWaitSync = intel_client_wait_sync;
+   functions->ServerWaitSync = intel_server_wait_sync;
+}