5 files changed, 69 insertions, 117 deletions
diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h
index 91d4f7a10ab..77b3ae6ec7a 100644
--- a/src/mesa/swrast/s_aatritemp.h
+++ b/src/mesa/swrast/s_aatritemp.h
@@ -181,13 +181,20 @@
       const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
       const GLfloat dxdy = majDx / majDy;
       const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F;
-      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
       GLint iy;
-      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+      for (iy = iyMin; iy < iyMax; iy++) {
+         GLfloat x = pMin[0] - (yMin - iy) * dxdy;
          GLint ix, startX = (GLint) (x - xAdj);
          GLuint count;
          GLfloat coverage = 0.0F;
 
+#ifdef _OPENMP
+         /* each thread needs to use a different (global) SpanArrays variable */
+         span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
          /* skip over fragments with zero coverage */
          while (startX < MAX_WIDTH) {
             coverage = compute_coveragef(pMin, pMid, pMax, startX, iy);
@@ -228,13 +235,12 @@
             coverage = compute_coveragef(pMin, pMid, pMax, ix, iy);
          }
          
-         if (ix <= startX)
-            continue;
-         
-         span.x = startX;
-         span.y = iy;
-         span.end = (GLuint) ix - (GLuint) startX;
-         _swrast_write_rgba_span(ctx, &span);
+         if (ix > startX) {
+            span.x = startX;
+            span.y = iy;
+            span.end = (GLuint) ix - (GLuint) startX;
+            _swrast_write_rgba_span(ctx, &span);
+         }
       }
    }
    else {
@@ -244,13 +250,20 @@
       const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
       const GLfloat dxdy = majDx / majDy;
       const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F;
-      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
       GLint iy;
-      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+      for (iy = iyMin; iy < iyMax; iy++) {
+         GLfloat x = pMin[0] - (yMin - iy) * dxdy;
          GLint ix, left, startX = (GLint) (x + xAdj);
          GLuint count, n;
          GLfloat coverage = 0.0F;
          
+#ifdef _OPENMP
+         /* each thread needs to use a different (global) SpanArrays variable */
+         span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
          /* make sure we're not past the window edge */
          if (startX >= ctx->DrawBuffer->_Xmax) {
             startX = ctx->DrawBuffer->_Xmax - 1;
@@ -296,31 +309,30 @@
          ATTRIB_LOOP_END
 #endif
 
-         if (startX <= ix)
-            continue;
-
-         n = (GLuint) startX - (GLuint) ix;
+         if (startX > ix) {
+            n = (GLuint) startX - (GLuint) ix;
 
-         left = ix + 1;
+            left = ix + 1;
 
-         /* shift all values to the left */
-         /* XXX this is temporary */
-         {
-            SWspanarrays *array = span.array;
-            GLint j;
-            for (j = 0; j < (GLint) n; j++) {
-               array->coverage[j] = array->coverage[j + left];
-               COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
+            /* shift all values to the left */
+            /* XXX this is temporary */
+            {
+               SWspanarrays *array = span.array;
+               GLint j;
+               for (j = 0; j < (GLint) n; j++) {
+                  array->coverage[j] = array->coverage[j + left];
+                  COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
 #ifdef DO_Z
-               array->z[j] = array->z[j + left];
+                  array->z[j] = array->z[j + left];
 #endif
+               }
             }
-         }
 
-         span.x = left;
-         span.y = iy;
-         span.end = n;
-         _swrast_write_rgba_span(ctx, &span);
+            span.x = left;
+            span.y = iy;
+            span.end = n;
+            _swrast_write_rgba_span(ctx, &span);
+         }
       }
    }
 }
diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index def1531d7ff..792b528ee34 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -417,84 +417,6 @@ _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte mask
    swrast->BlendFunc( ctx, n, mask, src, dst, chanType );
 }
 
-
-/**
- * Make sure we have texture image data for all the textures we may need
- * for subsequent rendering.
- */
-static void
-_swrast_validate_texture_images(struct gl_context *ctx)
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   GLuint u;
-
-   if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) {
-      /* no textures enabled, or no way to validate images! */
-      return;
-   }
-
-   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
-      if (ctx->Texture.Unit[u]._ReallyEnabled) {
-         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
-         ASSERT(texObj);
-         if (texObj) {
-            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-            GLuint face;
-            for (face = 0; face < numFaces; face++) {
-               GLint lvl;
-               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
-                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
-                  if (texImg && !texImg->Data) {
-                     swrast->ValidateTextureImage(ctx, texObj, face, lvl);
-                     ASSERT(texObj->Image[face][lvl]->Data);
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-
-/**
- * Free the texture image data attached to all currently enabled
- * textures.  Meant to be called by device drivers when transitioning
- * from software to hardware rendering.
- */
-void
-_swrast_eject_texture_images(struct gl_context *ctx)
-{
-   GLuint u;
-
-   if (!ctx->Texture._EnabledUnits) {
-      /* no textures enabled */
-      return;
-   }
-
-   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
-      if (ctx->Texture.Unit[u]._ReallyEnabled) {
-         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
-         ASSERT(texObj);
-         if (texObj) {
-            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-            GLuint face;
-            for (face = 0; face < numFaces; face++) {
-               GLint lvl;
-               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
-                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
-                  if (texImg && texImg->Data) {
-                     _mesa_free_texmemory(texImg->Data);
-                     texImg->Data = NULL;
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-
-
 static void
 _swrast_sleep( struct gl_context *ctx, GLbitfield new_state )
 {
@@ -640,7 +562,6 @@ _swrast_validate_derived( struct gl_context *ctx )
 
       if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM)) {
          _swrast_update_texture_samplers( ctx );
-         _swrast_validate_texture_images(ctx);
       }
 
       if (swrast->NewState & (_NEW_COLOR | _NEW_PROGRAM))
@@ -772,6 +693,11 @@ _swrast_CreateContext( struct gl_context *ctx )
 {
    GLuint i;
    SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext));
+#ifdef _OPENMP
+   const GLint maxThreads = omp_get_max_threads();
+#else
+   const GLint maxThreads = 1;
+#endif
 
    if (SWRAST_DEBUG) {
       _mesa_debug(ctx, "_swrast_CreateContext\n");
@@ -806,19 +732,25 @@ _swrast_CreateContext( struct gl_context *ctx )
    for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++)
       swrast->TextureSample[i] = NULL;
 
-   swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays);
+   /* SpanArrays is global and shared by all SWspan instances. However, when
+    * using multiple threads, it is necessary to have one SpanArrays instance
+    * per thread.
+    */
+   swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays));
    if (!swrast->SpanArrays) {
       FREE(swrast);
       return GL_FALSE;
    }
-   swrast->SpanArrays->ChanType = CHAN_TYPE;
+   for(i = 0; i < maxThreads; i++) {
+      swrast->SpanArrays[i].ChanType = CHAN_TYPE;
 #if CHAN_TYPE == GL_UNSIGNED_BYTE
-   swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8;
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8;
 #elif CHAN_TYPE == GL_UNSIGNED_SHORT
-   swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16;
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16;
 #else
-   swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0];
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0];
 #endif
+   }
 
    /* init point span buffer */
    swrast->PointSpan.primitive = GL_POINT;
@@ -826,7 +758,10 @@ _swrast_CreateContext( struct gl_context *ctx )
    swrast->PointSpan.facing = 0;
    swrast->PointSpan.array = swrast->SpanArrays;
 
-   swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits *
+   /* TexelBuffer is also global and normally shared by all SWspan instances;
+    * when running with multiple threads, create one per thread.
+    */
+   swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
                                            MAX_WIDTH * 4 * sizeof(GLfloat));
    if (!swrast->TexelBuffer) {
       FREE(swrast->SpanArrays);
diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index db102ac7946..9a91be39970 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -212,10 +212,10 @@ interpolate_active_attribs(struct gl_context *ctx, SWspan *span, GLbitfield attr
 static INLINE void
 interpolate_int_colors(struct gl_context *ctx, SWspan *span)
 {
+#if CHAN_BITS != 32
    const GLuint n = span->end;
    GLuint i;
 
-#if CHAN_BITS != 32
    ASSERT(!(span->arrayMask & SPAN_RGBA));
 #endif
 
diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c
index 5bec71c057b..fa5093a3407 100644
--- a/src/mesa/swrast/s_stencil.c
+++ b/src/mesa/swrast/s_stencil.c
@@ -462,7 +462,8 @@ stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face)
     * Some fragments passed the stencil test, apply depth test to them
     * and apply Zpass and Zfail stencil ops.
     */
-   if (ctx->Depth.Test == GL_FALSE) {
+   if (ctx->Depth.Test == GL_FALSE ||
+       ctx->DrawBuffer->_DepthBuffer == NULL) {
       /*
        * No depth buffer, just apply zpass stencil function to active pixels.
        */
diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c
index 086ed0b33d7..80b9dff3cc2 100644
--- a/src/mesa/swrast/s_texcombine.c
+++ b/src/mesa/swrast/s_texcombine.c
@@ -48,7 +48,11 @@ typedef float (*float4_array)[4];
 static INLINE float4_array
 get_texel_array(SWcontext *swrast, GLuint unit)
 {
+#ifdef _OPENMP
+   return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
+#else
    return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
+#endif
 }